diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index d7938ef1d..5f4014107 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -233,441 +233,2440 @@ static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) __asm__ __volatile__ ( "sub sp, sp, #32\n\t" "mov r10, #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r11, [%[a], #0]\n\t" - "ldr r12, [%[b], #0]\n\t" + /* A[0] * B[0] */ + "ldr r11, [%[a]]\n\t" + "ldr r12, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r3, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "mov r5, #0\n\t" +#else "umull r3, r4, r11, r12\n\t" "mov r5, #0\n\t" +#endif "str r3, [sp]\n\t" - "# A[0] * B[1]\n\t" + /* A[0] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[0]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[0] */ "ldr r8, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #4]\n\t" - "# A[2] * B[0]\n\t" + /* A[2] * B[0] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[1] */ "ldr r11, [%[a], #4]\n\t" "ldr r12, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[2]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[2] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #8]\n\t" - "# A[0] * B[3]\n\t" + /* A[0] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[2]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[2] */ "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[1] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[0] */ "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #12]\n\t" - "# A[4] * B[0]\n\t" + /* A[4] * B[0] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[1] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[2] */ "ldr r11, [%[a], #8]\n\t" "ldr r12, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[3] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[4]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[4] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #16]\n\t" - "# A[0] * B[5]\n\t" + /* A[0] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[4]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[4] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[2] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[1] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[0] */ "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #20]\n\t" - "# A[6] * B[0]\n\t" + /* A[6] * B[0] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[1]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[1] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[2] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[3] */ "ldr r11, [%[a], #12]\n\t" "ldr r12, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[4] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[5] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[0] * B[6]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[0] * B[6] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #24]\n\t" - "# A[0] * B[7]\n\t" + /* A[0] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[6]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[6] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[5] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[4] */ "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[3] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[2] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[1]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[1] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[0] */ "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #28]\n\t" - "# A[7] * B[1]\n\t" + /* A[7] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[6] * B[2]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[2] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[3] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[4] */ "ldr r11, [%[a], #16]\n\t" "ldr r12, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[5] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[6] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[1] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[7] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #32]\n\t" - "# A[2] * B[7]\n\t" + /* A[2] * B[7] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[3] * B[6]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[6] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[4] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[3] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[2] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #36]\n\t" - "# A[7] * B[3]\n\t" + /* A[7] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[6] * B[4]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[4] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[5] */ "ldr r11, [%[a], #20]\n\t" "ldr r12, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[6] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[7] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #40]\n\t" - "# A[4] * B[7]\n\t" + /* A[4] * B[7] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[5] * B[6]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[6] */ "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[5] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[4] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #44]\n\t" - "# A[7] * B[5]\n\t" + /* A[7] * B[5] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[6] * B[6]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[6] */ "ldr r11, [%[a], #24]\n\t" "ldr r12, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[7] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #48]\n\t" - "# A[6] * B[7]\n\t" + /* A[6] * B[7] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[7] * B[6]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[6] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #52]\n\t" - "# A[7] * B[7]\n\t" + /* A[7] * B[7] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adc r3, r3, r7\n\t" +#endif "str r5, [%[r], #56]\n\t" "str r3, [%[r], #60]\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" "stm %[r]!, {r3, r4, r5, r6}\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sub %[r], %[r], #32\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } @@ -678,34 +2677,30 @@ static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "adc %[c], r14, r14\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "adc %[r], r12, r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); - - return c; + return (uint32_t)(size_t)r; } /* Sub b from a into a. (a -= b) @@ -715,44 +2710,41 @@ static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, */ static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r11, r11\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -761,48 +2753,44 @@ static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "adc %[c], r14, r14\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "adc %[r], r12, r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -877,72 +2865,69 @@ SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a, */ static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r11, r11\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -951,76 +2936,72 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "adc %[c], r14, r14\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "adc %[r], r12, r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -1099,128 +3080,125 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, */ static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r11, r11\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -1229,132 +3207,128 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "adc %[c], r14, r14\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "adc %[r], r12, r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -1435,108 +3409,514 @@ static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( "sub sp, sp, #32\n\t" - "mov r12, #0\n\t" - "# A[0] * A[0]\n\t" - "ldr r10, [%[a], #0]\n\t" + /* A[0] * A[0] */ + "ldr r10, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r9, r10, #16\n\t" + "lsl r2, r10, #16\n\t" + "lsr r2, r2, #16\n\t" + "mul r8, r2, r2\n\t" + "mul r3, r9, r9\n\t" + "mul r2, r9, r2\n\t" + "lsr r9, r2, #15\n\t" + "lsl r2, r2, #17\n\t" + "adds r8, r8, r2\n\t" + "adc r3, r3, r9\n\t" +#else "umull r8, r3, r10, r10\n\t" +#endif "mov r4, #0\n\t" "str r8, [sp]\n\t" - "# A[0] * A[1]\n\t" + /* A[0] * A[1] */ "ldr r10, [%[a], #4]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #4]\n\t" - "# A[0] * A[2]\n\t" + /* A[0] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[1] * A[1]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * A[1] */ "ldr r10, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #8]\n\t" - "# A[0] * A[3]\n\t" + /* A[0] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[1] * A[2]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [sp, #12]\n\t" - "# A[0] * A[4]\n\t" + /* A[0] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[1] * A[3]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[1] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[2] * A[2]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[2] * A[2] */ "ldr r10, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #16]\n\t" - "# A[0] * A[5]\n\t" + /* A[0] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[4]\n\t" + /* A[1] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -1544,66 +3924,294 @@ static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #20]\n\t" - "# A[0] * A[6]\n\t" + /* A[0] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[5]\n\t" + /* A[1] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[3] */ "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #24]\n\t" - "# A[0] * A[7]\n\t" + /* A[0] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[6]\n\t" + /* A[1] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -1611,59 +4219,256 @@ static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #28]\n\t" - "# A[1] * A[7]\n\t" + /* A[1] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[2] * A[6]\n\t" + /* A[2] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[4] */ "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #32]\n\t" - "# A[2] * A[7]\n\t" + /* A[2] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[3] * A[6]\n\t" + /* A[3] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -1671,97 +4476,424 @@ static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #36]\n\t" - "# A[3] * A[7]\n\t" + /* A[3] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[4] * A[6]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[4] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[5] * A[5]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[5] * A[5] */ "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [%[r], #40]\n\t" - "# A[4] * A[7]\n\t" + /* A[4] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[5] * A[6]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #44]\n\t" - "# A[5] * A[7]\n\t" + /* A[5] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[6] * A[6]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * A[6] */ "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [%[r], #48]\n\t" - "# A[6] * A[7]\n\t" + /* A[6] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [%[r], #52]\n\t" - "# A[7] * A[7]\n\t" + /* A[7] * A[7] */ "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adc r2, r2, r9\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adc r2, r2, r9\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adc r2, r2, r9\n\t" +#endif "str r4, [%[r], #56]\n\t" "str r2, [%[r], #60]\n\t" "ldm sp!, {r2, r3, r4, r8}\n\t" "stm %[r]!, {r2, r3, r4, r8}\n\t" "ldm sp!, {r2, r3, r4, r8}\n\t" "stm %[r]!, {r2, r3, r4, r8}\n\t" - "sub %[r], %[r], #32\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r12" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); } @@ -1771,33 +4903,29 @@ static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); - - return c; + return (uint32_t)(size_t)r; } /* Square a and put result in r. (r = a * a) @@ -1842,47 +4970,43 @@ SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); - - return c; + return (uint32_t)(size_t)r; } /* Square a and put result in r. (r = a * a) @@ -1927,75 +5051,71 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); - - return c; + return (uint32_t)(size_t)r; } /* Square a and put result in r. (r = a * a) @@ -2042,15 +5162,14 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #256\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #0x100\n\t" + "\n" + "L_sp_2048_add_64_word_%=: \n\t" + "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" @@ -2059,15 +5178,15 @@ static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, "adcs r7, r7, r11\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "adc r3, r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne L_sp_2048_add_64_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -2079,29 +5198,29 @@ static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, */ static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #256\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r10, #0\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x100\n\t" + "\n" + "L_sp_2048_sub_in_pkace_64_word_%=: \n\t" + "subs r12, r10, r12\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r10, r10\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_2048_sub_in_pkace_64_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -2115,54 +5234,81 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) { __asm__ __volatile__ ( - "sub sp, sp, #512\n\t" + "sub sp, sp, #0x200\n\t" "mov r5, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #252\n\t" + "\n" + "L_sp_2048_mul_64_outer_%=: \n\t" + "subs r3, r5, #0xfc\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_2048_mul_64_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #256\n\t" - "beq 3f\n\t" + "cmp r3, #0x100\n\t" + "beq L_sp_2048_mul_64_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_2048_mul_64_inner_%=\n\t" + "\n" + "L_sp_2048_mul_64_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #504\n\t" - "ble 1b\n\t" + "cmp r5, #0x1f8\n\t" + "ble L_sp_2048_mul_64_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_2048_mul_64_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "bgt L_sp_2048_mul_64_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); } @@ -2174,77 +5320,132 @@ static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "sub sp, sp, #512\n\t" + "sub sp, sp, #0x200\n\t" "mov r12, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" "mov r5, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #252\n\t" + "\n" + "L_sp_2048_sqr_64_outer_%=: \n\t" + "subs r3, r5, #0xfc\n\t" "it cc\n\t" "movcc r3, r12\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" + "\n" + "L_sp_2048_sqr_64_inner_%=: \n\t" "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "beq L_sp_2048_sqr_64_op_sqr_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "bal L_sp_2048_sqr_64_op_done_%=\n\t" + "\n" + "L_sp_2048_sqr_64_op_sqr_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_2048_sqr_64_op_done_%=: \n\t" "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #256\n\t" - "beq 3f\n\t" + "cmp r3, #0x100\n\t" + "beq L_sp_2048_sqr_64_inner_done_%=\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" + "bgt L_sp_2048_sqr_64_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_2048_sqr_64_inner_%=\n\t" + "\n" + "L_sp_2048_sqr_64_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #504\n\t" - "ble 1b\n\t" + "cmp r5, #0x1f8\n\t" + "ble L_sp_2048_sqr_64_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r9, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r9, [%[r], #12]\n\t" -#else - "ldrd r6, r7, [sp, #0]\n\t" - "ldrd r8, r9, [sp, #8]\n\t" - "strd r6, r7, [%[r], #0]\n\t" - "strd r8, r9, [%[r], #8]\n\t" -#endif - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_2048_sqr_64_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "bgt L_sp_2048_sqr_64_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "r12" ); } @@ -2274,15 +5475,14 @@ static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #128\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #0x80\n\t" + "\n" + "L_sp_2048_add_32_word_%=: \n\t" + "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" @@ -2291,15 +5491,15 @@ static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, "adcs r7, r7, r11\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "adc r3, r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne L_sp_2048_add_32_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -2311,29 +5511,29 @@ static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, */ static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #128\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r10, #0\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x80\n\t" + "\n" + "L_sp_2048_sub_in_pkace_32_word_%=: \n\t" + "subs r12, r10, r12\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r10, r10\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_2048_sub_in_pkace_32_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -2347,54 +5547,81 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { __asm__ __volatile__ ( - "sub sp, sp, #256\n\t" + "sub sp, sp, #0x100\n\t" "mov r5, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #124\n\t" + "\n" + "L_sp_2048_mul_32_outer_%=: \n\t" + "subs r3, r5, #0x7c\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_2048_mul_32_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #128\n\t" - "beq 3f\n\t" + "cmp r3, #0x80\n\t" + "beq L_sp_2048_mul_32_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_2048_mul_32_inner_%=\n\t" + "\n" + "L_sp_2048_mul_32_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #248\n\t" - "ble 1b\n\t" + "cmp r5, #0xf8\n\t" + "ble L_sp_2048_mul_32_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_2048_mul_32_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "bgt L_sp_2048_mul_32_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); } @@ -2406,77 +5633,132 @@ static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "sub sp, sp, #256\n\t" + "sub sp, sp, #0x100\n\t" "mov r12, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" "mov r5, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #124\n\t" + "\n" + "L_sp_2048_sqr_32_outer_%=: \n\t" + "subs r3, r5, #0x7c\n\t" "it cc\n\t" "movcc r3, r12\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" + "\n" + "L_sp_2048_sqr_32_inner_%=: \n\t" "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "beq L_sp_2048_sqr_32_op_sqr_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "bal L_sp_2048_sqr_32_op_done_%=\n\t" + "\n" + "L_sp_2048_sqr_32_op_sqr_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_2048_sqr_32_op_done_%=: \n\t" "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #128\n\t" - "beq 3f\n\t" + "cmp r3, #0x80\n\t" + "beq L_sp_2048_sqr_32_inner_done_%=\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" + "bgt L_sp_2048_sqr_32_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_2048_sqr_32_inner_%=\n\t" + "\n" + "L_sp_2048_sqr_32_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #248\n\t" - "ble 1b\n\t" + "cmp r5, #0xf8\n\t" + "ble L_sp_2048_sqr_32_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r9, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r9, [%[r], #12]\n\t" -#else - "ldrd r6, r7, [sp, #0]\n\t" - "ldrd r8, r9, [sp, #8]\n\t" - "strd r6, r7, [%[r], #0]\n\t" - "strd r8, r9, [%[r], #8]\n\t" -#endif - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_2048_sqr_32_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "bgt L_sp_2048_sqr_32_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "r12" ); } @@ -2503,499 +5785,2662 @@ static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho) *rho = (sp_digit)0 - x; } +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b) { -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_2048_mul_d_64_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" - "cmp r9, #256\n\t" - "blt 1b\n\t" + "cmp r9, #0x100\n\t" + "blt L_sp_2048_mul_d_64_word_%=\n\t" "str r3, [%[r], #256]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); +} + #else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b) +{ __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else "umull r3, r4, %[b], r8\n\t" +#endif "mov r5, #0\n\t" "str r3, [%[r]], #4\n\t" - "# A[1] * B\n\t" + /* A[1] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[2] * B\n\t" + /* A[2] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[3] * B\n\t" + /* A[3] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[4] * B\n\t" + /* A[4] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[5] * B\n\t" + /* A[5] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[6] * B\n\t" + /* A[6] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[7] * B\n\t" + /* A[7] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[8] * B\n\t" + /* A[8] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[9] * B\n\t" + /* A[9] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[10] * B\n\t" + /* A[10] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[11] * B\n\t" + /* A[11] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[12] * B\n\t" + /* A[12] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[13] * B\n\t" + /* A[13] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[14] * B\n\t" + /* A[14] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[15] * B\n\t" + /* A[15] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[16] * B\n\t" + /* A[16] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[17] * B\n\t" + /* A[17] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[18] * B\n\t" + /* A[18] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[19] * B\n\t" + /* A[19] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[20] * B\n\t" + /* A[20] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[21] * B\n\t" + /* A[21] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[22] * B\n\t" + /* A[22] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[23] * B\n\t" + /* A[23] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[24] * B\n\t" + /* A[24] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[25] * B\n\t" + /* A[25] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[26] * B\n\t" + /* A[26] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[27] * B\n\t" + /* A[27] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[28] * B\n\t" + /* A[28] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[29] * B\n\t" + /* A[29] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[30] * B\n\t" + /* A[30] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[31] * B\n\t" + /* A[31] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[32] * B\n\t" + /* A[32] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[33] * B\n\t" + /* A[33] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[34] * B\n\t" + /* A[34] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[35] * B\n\t" + /* A[35] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[36] * B\n\t" + /* A[36] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[37] * B\n\t" + /* A[37] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[38] * B\n\t" + /* A[38] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[39] * B\n\t" + /* A[39] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[40] * B\n\t" + /* A[40] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[41] * B\n\t" + /* A[41] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[42] * B\n\t" + /* A[42] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[43] * B\n\t" + /* A[43] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[44] * B\n\t" + /* A[44] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[45] * B\n\t" + /* A[45] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[46] * B\n\t" + /* A[46] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[47] * B\n\t" + /* A[47] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[48] * B\n\t" + /* A[48] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[49] * B\n\t" + /* A[49] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[50] * B\n\t" + /* A[50] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[51] * B\n\t" + /* A[51] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[52] * B\n\t" + /* A[52] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[53] * B\n\t" + /* A[53] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[54] * B\n\t" + /* A[54] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[55] * B\n\t" + /* A[55] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[56] * B\n\t" + /* A[56] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[57] * B\n\t" + /* A[57] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[58] * B\n\t" + /* A[58] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[59] * B\n\t" + /* A[59] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[60] * B\n\t" + /* A[60] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[61] * B\n\t" + /* A[61] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[62] * B\n\t" + /* A[62] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[63] * B\n\t" + /* A[63] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adc r4, r4, r7\n\t" +#endif "str r3, [%[r]], #4\n\t" "str r4, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10" ); -#endif } +#endif /* WOLFSSL_SP_SMALL */ #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 2048 bits, just need to subtract. @@ -3011,6 +8456,7 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) sp_2048_sub_in_place_32(r, m); } +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -3019,34 +8465,45 @@ static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r6, [%[b], r8]\n\t" - "and r6, r6, %[m]\n\t" - "sbcs r4, r4, r6\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #128\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_2048_cond_sub_32_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #0x80\n\t" + "blt L_sp_2048_cond_sub_32_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6" ); -#else - __asm__ __volatile__ ( + return (uint32_t)(size_t)r; +} - "mov r9, #0\n\t" +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +{ + __asm__ __volatile__ ( + "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" @@ -3159,336 +8616,1181 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" "stm %[r]!, {r4, r5}\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7" ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#endif /* WOLFSSL_SP_SMALL */ /* Reduce the number back to 2048 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) { - sp_digit ca = 0; - __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_2048_mont_reduce_32_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, r8, r11\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r10, #0\n\t" +#endif + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r10, #0\n\t" +#endif + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" +#endif "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" - "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" "adc r4, r4, #0\n\t" - "# a[i+12] += m[12] * mu\n\t" - "ldr r7, [%[m], #48]\n\t" - "ldr r9, [%[a], #48]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #48]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+13] += m[13] * mu\n\t" - "ldr r7, [%[m], #52]\n\t" - "ldr r9, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #52]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" "adc r4, r4, #0\n\t" - "# a[i+14] += m[14] * mu\n\t" - "ldr r7, [%[m], #56]\n\t" - "ldr r9, [%[a], #56]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #56]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+15] += m[15] * mu\n\t" - "ldr r7, [%[m], #60]\n\t" - "ldr r9, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #60]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" "adc r4, r4, #0\n\t" - "# a[i+16] += m[16] * mu\n\t" - "ldr r7, [%[m], #64]\n\t" - "ldr r9, [%[a], #64]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #64]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+17] += m[17] * mu\n\t" - "ldr r7, [%[m], #68]\n\t" - "ldr r9, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #68]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" "adc r4, r4, #0\n\t" - "# a[i+18] += m[18] * mu\n\t" - "ldr r7, [%[m], #72]\n\t" - "ldr r9, [%[a], #72]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #72]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+19] += m[19] * mu\n\t" - "ldr r7, [%[m], #76]\n\t" - "ldr r9, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #76]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" "adc r4, r4, #0\n\t" - "# a[i+20] += m[20] * mu\n\t" - "ldr r7, [%[m], #80]\n\t" - "ldr r9, [%[a], #80]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #80]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+21] += m[21] * mu\n\t" - "ldr r7, [%[m], #84]\n\t" - "ldr r9, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #84]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" "adc r4, r4, #0\n\t" - "# a[i+22] += m[22] * mu\n\t" - "ldr r7, [%[m], #88]\n\t" - "ldr r9, [%[a], #88]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #88]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+23] += m[23] * mu\n\t" - "ldr r7, [%[m], #92]\n\t" - "ldr r9, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #92]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" "adc r4, r4, #0\n\t" - "# a[i+24] += m[24] * mu\n\t" - "ldr r7, [%[m], #96]\n\t" - "ldr r9, [%[a], #96]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #96]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+25] += m[25] * mu\n\t" - "ldr r7, [%[m], #100]\n\t" - "ldr r9, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #100]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" "adc r4, r4, #0\n\t" - "# a[i+26] += m[26] * mu\n\t" - "ldr r7, [%[m], #104]\n\t" - "ldr r9, [%[a], #104]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #104]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+27] += m[27] * mu\n\t" - "ldr r7, [%[m], #108]\n\t" - "ldr r9, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #108]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+28] += m[28] * mu\n\t" - "ldr r7, [%[m], #112]\n\t" - "ldr r9, [%[a], #112]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #112]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" "adc r5, r5, #0\n\t" - "# a[i+29] += m[29] * mu\n\t" - "ldr r7, [%[m], #116]\n\t" - "ldr r9, [%[a], #116]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #116]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+30] += m[30] * mu\n\t" - "ldr r7, [%[m], #120]\n\t" - "ldr r9, [%[a], #120]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #120]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+31] += m[31] * mu\n\t" + /* a[i+31] += m[31] * mu */ +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r7, [%[m], #124]\n\t" - "ldr r9, [%[a], #124]\n\t" +#else + "ldr r11, [%[m], #124]\n\t" +#endif + "ldr r10, [%[a], #124]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #124]\n\t" - "ldr r9, [%[a], #128]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #128]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" + "ldr r10, [%[a], #128]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #128\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #0x80\n\t" + "blt L_sp_2048_mont_reduce_32_word_%=\n\t" + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); - - sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca); + sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - mp); } /* Multiply two Montgomery form numbers mod the modulus (prime). @@ -3521,275 +9823,1383 @@ SP_NOINLINE static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a, sp_2048_mont_reduce_32(r, m, mp); } +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) { -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_2048_mul_d_32_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" - "cmp r9, #128\n\t" - "blt 1b\n\t" + "cmp r9, #0x80\n\t" + "blt L_sp_2048_mul_d_32_word_%=\n\t" "str r3, [%[r], #128]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); +} + #else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) +{ __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else "umull r3, r4, %[b], r8\n\t" +#endif "mov r5, #0\n\t" "str r3, [%[r]], #4\n\t" - "# A[1] * B\n\t" + /* A[1] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[2] * B\n\t" + /* A[2] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[3] * B\n\t" + /* A[3] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[4] * B\n\t" + /* A[4] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[5] * B\n\t" + /* A[5] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[6] * B\n\t" + /* A[6] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[7] * B\n\t" + /* A[7] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[8] * B\n\t" + /* A[8] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[9] * B\n\t" + /* A[9] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[10] * B\n\t" + /* A[10] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[11] * B\n\t" + /* A[11] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[12] * B\n\t" + /* A[12] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[13] * B\n\t" + /* A[13] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[14] * B\n\t" + /* A[14] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[15] * B\n\t" + /* A[15] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[16] * B\n\t" + /* A[16] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[17] * B\n\t" + /* A[17] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[18] * B\n\t" + /* A[18] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[19] * B\n\t" + /* A[19] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[20] * B\n\t" + /* A[20] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[21] * B\n\t" + /* A[21] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[22] * B\n\t" + /* A[22] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[23] * B\n\t" + /* A[23] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[24] * B\n\t" + /* A[24] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[25] * B\n\t" + /* A[25] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[26] * B\n\t" + /* A[26] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[27] * B\n\t" + /* A[27] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[28] * B\n\t" + /* A[28] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[29] * B\n\t" + /* A[29] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[30] * B\n\t" + /* A[30] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[31] * B\n\t" + /* A[31] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" +#endif "str r4, [%[r]], #4\n\t" "str r5, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10" ); -#endif } +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV /* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. @@ -3801,57 +11211,184 @@ static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, */ static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) +{ + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_2048_word_32_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_2048_word_32_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* Compare a with b in constant time. * * a A single precision integer. @@ -3861,398 +11398,390 @@ static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, sp_digit div) */ static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) { - sp_digit r = -1; - sp_digit one = 1; - - + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #124\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "mov r4, #0x7c\n\t" + "\n" + "L_sp_2048_cmp_32_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_2048_cmp_32_words_%=\n\t" + "eor r2, r2, r3\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "ldr r12, [%[a], #124]\n\t" + "ldr lr, [%[b], #124]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #120]\n\t" + "ldr lr, [%[b], #120]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #116]\n\t" + "ldr lr, [%[b], #116]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #112]\n\t" + "ldr lr, [%[b], #112]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #108]\n\t" + "ldr lr, [%[b], #108]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #104]\n\t" + "ldr lr, [%[b], #104]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #100]\n\t" + "ldr lr, [%[b], #100]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #96]\n\t" + "ldr lr, [%[b], #96]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #92]\n\t" + "ldr lr, [%[b], #92]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #88]\n\t" + "ldr lr, [%[b], #88]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #84]\n\t" + "ldr lr, [%[b], #84]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #80]\n\t" + "ldr lr, [%[b], #80]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #76]\n\t" + "ldr lr, [%[b], #76]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #72]\n\t" + "ldr lr, [%[b], #72]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #68]\n\t" + "ldr lr, [%[b], #68]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #64]\n\t" + "ldr lr, [%[b], #64]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #60]\n\t" + "ldr lr, [%[b], #60]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #56]\n\t" + "ldr lr, [%[b], #56]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #52]\n\t" + "ldr lr, [%[b], #52]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #48]\n\t" + "ldr lr, [%[b], #48]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); -#endif - - return r; + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -4651,6 +12180,7 @@ static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m) } #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -4659,34 +12189,45 @@ static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r6, [%[b], r8]\n\t" - "and r6, r6, %[m]\n\t" - "sbcs r4, r4, r6\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #256\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_2048_cond_sub_64_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #0x100\n\t" + "blt L_sp_2048_cond_sub_64_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6" ); -#else - __asm__ __volatile__ ( + return (uint32_t)(size_t)r; +} - "mov r9, #0\n\t" +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +{ + __asm__ __volatile__ ( + "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" @@ -4911,624 +12452,2301 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_dig "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" "stm %[r]!, {r4, r5}\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7" ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#endif /* WOLFSSL_SP_SMALL */ /* Reduce the number back to 2048 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit mp) { - sp_digit ca = 0; - __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_2048_mont_reduce_64_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, r8, r11\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r10, #0\n\t" +#endif + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r10, #0\n\t" +#endif + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" +#endif "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" - "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" "adc r4, r4, #0\n\t" - "# a[i+12] += m[12] * mu\n\t" - "ldr r7, [%[m], #48]\n\t" - "ldr r9, [%[a], #48]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #48]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+13] += m[13] * mu\n\t" - "ldr r7, [%[m], #52]\n\t" - "ldr r9, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #52]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" "adc r4, r4, #0\n\t" - "# a[i+14] += m[14] * mu\n\t" - "ldr r7, [%[m], #56]\n\t" - "ldr r9, [%[a], #56]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #56]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+15] += m[15] * mu\n\t" - "ldr r7, [%[m], #60]\n\t" - "ldr r9, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #60]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" "adc r4, r4, #0\n\t" - "# a[i+16] += m[16] * mu\n\t" - "ldr r7, [%[m], #64]\n\t" - "ldr r9, [%[a], #64]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #64]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+17] += m[17] * mu\n\t" - "ldr r7, [%[m], #68]\n\t" - "ldr r9, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #68]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" "adc r4, r4, #0\n\t" - "# a[i+18] += m[18] * mu\n\t" - "ldr r7, [%[m], #72]\n\t" - "ldr r9, [%[a], #72]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #72]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+19] += m[19] * mu\n\t" - "ldr r7, [%[m], #76]\n\t" - "ldr r9, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #76]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" "adc r4, r4, #0\n\t" - "# a[i+20] += m[20] * mu\n\t" - "ldr r7, [%[m], #80]\n\t" - "ldr r9, [%[a], #80]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #80]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+21] += m[21] * mu\n\t" - "ldr r7, [%[m], #84]\n\t" - "ldr r9, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #84]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" "adc r4, r4, #0\n\t" - "# a[i+22] += m[22] * mu\n\t" - "ldr r7, [%[m], #88]\n\t" - "ldr r9, [%[a], #88]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #88]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+23] += m[23] * mu\n\t" - "ldr r7, [%[m], #92]\n\t" - "ldr r9, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #92]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" "adc r4, r4, #0\n\t" - "# a[i+24] += m[24] * mu\n\t" - "ldr r7, [%[m], #96]\n\t" - "ldr r9, [%[a], #96]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #96]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+25] += m[25] * mu\n\t" - "ldr r7, [%[m], #100]\n\t" - "ldr r9, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #100]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" "adc r4, r4, #0\n\t" - "# a[i+26] += m[26] * mu\n\t" - "ldr r7, [%[m], #104]\n\t" - "ldr r9, [%[a], #104]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #104]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+27] += m[27] * mu\n\t" - "ldr r7, [%[m], #108]\n\t" - "ldr r9, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r7, [%[m], #124]\n\t" + "ldr r10, [%[a], #124]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #108]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" "adc r4, r4, #0\n\t" - "# a[i+28] += m[28] * mu\n\t" - "ldr r7, [%[m], #112]\n\t" - "ldr r9, [%[a], #112]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #112]\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r7, [%[m], #128]\n\t" + "ldr r10, [%[a], #128]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+29] += m[29] * mu\n\t" - "ldr r7, [%[m], #116]\n\t" - "ldr r9, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r7, [%[m], #132]\n\t" + "ldr r10, [%[a], #132]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #116]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #132]\n\t" "adc r4, r4, #0\n\t" - "# a[i+30] += m[30] * mu\n\t" - "ldr r7, [%[m], #120]\n\t" - "ldr r9, [%[a], #120]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #120]\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r7, [%[m], #136]\n\t" + "ldr r10, [%[a], #136]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+31] += m[31] * mu\n\t" - "ldr r7, [%[m], #124]\n\t" - "ldr r9, [%[a], #124]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r7, [%[m], #140]\n\t" + "ldr r10, [%[a], #140]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #124]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #140]\n\t" "adc r4, r4, #0\n\t" - "# a[i+32] += m[32] * mu\n\t" - "ldr r7, [%[m], #128]\n\t" - "ldr r9, [%[a], #128]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #128]\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r7, [%[m], #144]\n\t" + "ldr r10, [%[a], #144]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+33] += m[33] * mu\n\t" - "ldr r7, [%[m], #132]\n\t" - "ldr r9, [%[a], #132]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r7, [%[m], #148]\n\t" + "ldr r10, [%[a], #148]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #132]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #148]\n\t" "adc r4, r4, #0\n\t" - "# a[i+34] += m[34] * mu\n\t" - "ldr r7, [%[m], #136]\n\t" - "ldr r9, [%[a], #136]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #136]\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r7, [%[m], #152]\n\t" + "ldr r10, [%[a], #152]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+35] += m[35] * mu\n\t" - "ldr r7, [%[m], #140]\n\t" - "ldr r9, [%[a], #140]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r7, [%[m], #156]\n\t" + "ldr r10, [%[a], #156]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #140]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #156]\n\t" "adc r4, r4, #0\n\t" - "# a[i+36] += m[36] * mu\n\t" - "ldr r7, [%[m], #144]\n\t" - "ldr r9, [%[a], #144]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #144]\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r7, [%[m], #160]\n\t" + "ldr r10, [%[a], #160]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+37] += m[37] * mu\n\t" - "ldr r7, [%[m], #148]\n\t" - "ldr r9, [%[a], #148]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r7, [%[m], #164]\n\t" + "ldr r10, [%[a], #164]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #148]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #164]\n\t" "adc r4, r4, #0\n\t" - "# a[i+38] += m[38] * mu\n\t" - "ldr r7, [%[m], #152]\n\t" - "ldr r9, [%[a], #152]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #152]\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r7, [%[m], #168]\n\t" + "ldr r10, [%[a], #168]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+39] += m[39] * mu\n\t" - "ldr r7, [%[m], #156]\n\t" - "ldr r9, [%[a], #156]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r7, [%[m], #172]\n\t" + "ldr r10, [%[a], #172]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #156]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #172]\n\t" "adc r4, r4, #0\n\t" - "# a[i+40] += m[40] * mu\n\t" - "ldr r7, [%[m], #160]\n\t" - "ldr r9, [%[a], #160]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #160]\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r7, [%[m], #176]\n\t" + "ldr r10, [%[a], #176]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+41] += m[41] * mu\n\t" - "ldr r7, [%[m], #164]\n\t" - "ldr r9, [%[a], #164]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r7, [%[m], #180]\n\t" + "ldr r10, [%[a], #180]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #164]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #180]\n\t" "adc r4, r4, #0\n\t" - "# a[i+42] += m[42] * mu\n\t" - "ldr r7, [%[m], #168]\n\t" - "ldr r9, [%[a], #168]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #168]\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r7, [%[m], #184]\n\t" + "ldr r10, [%[a], #184]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+43] += m[43] * mu\n\t" - "ldr r7, [%[m], #172]\n\t" - "ldr r9, [%[a], #172]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+47] += m[47] * mu */ + "ldr r7, [%[m], #188]\n\t" + "ldr r10, [%[a], #188]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #172]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #188]\n\t" "adc r4, r4, #0\n\t" - "# a[i+44] += m[44] * mu\n\t" - "ldr r7, [%[m], #176]\n\t" - "ldr r9, [%[a], #176]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #176]\n\t" + /* a[i+48] += m[48] * mu */ + "ldr r7, [%[m], #192]\n\t" + "ldr r10, [%[a], #192]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+45] += m[45] * mu\n\t" - "ldr r7, [%[m], #180]\n\t" - "ldr r9, [%[a], #180]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #192]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+49] += m[49] * mu */ + "ldr r7, [%[m], #196]\n\t" + "ldr r10, [%[a], #196]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #180]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #196]\n\t" "adc r4, r4, #0\n\t" - "# a[i+46] += m[46] * mu\n\t" - "ldr r7, [%[m], #184]\n\t" - "ldr r9, [%[a], #184]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #184]\n\t" + /* a[i+50] += m[50] * mu */ + "ldr r7, [%[m], #200]\n\t" + "ldr r10, [%[a], #200]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+47] += m[47] * mu\n\t" - "ldr r7, [%[m], #188]\n\t" - "ldr r9, [%[a], #188]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #200]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+51] += m[51] * mu */ + "ldr r7, [%[m], #204]\n\t" + "ldr r10, [%[a], #204]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #188]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #204]\n\t" "adc r4, r4, #0\n\t" - "# a[i+48] += m[48] * mu\n\t" - "ldr r7, [%[m], #192]\n\t" - "ldr r9, [%[a], #192]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #192]\n\t" + /* a[i+52] += m[52] * mu */ + "ldr r7, [%[m], #208]\n\t" + "ldr r10, [%[a], #208]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+49] += m[49] * mu\n\t" - "ldr r7, [%[m], #196]\n\t" - "ldr r9, [%[a], #196]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #208]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+53] += m[53] * mu */ + "ldr r7, [%[m], #212]\n\t" + "ldr r10, [%[a], #212]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #196]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #212]\n\t" "adc r4, r4, #0\n\t" - "# a[i+50] += m[50] * mu\n\t" - "ldr r7, [%[m], #200]\n\t" - "ldr r9, [%[a], #200]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #200]\n\t" + /* a[i+54] += m[54] * mu */ + "ldr r7, [%[m], #216]\n\t" + "ldr r10, [%[a], #216]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+51] += m[51] * mu\n\t" - "ldr r7, [%[m], #204]\n\t" - "ldr r9, [%[a], #204]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #216]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+55] += m[55] * mu */ + "ldr r7, [%[m], #220]\n\t" + "ldr r10, [%[a], #220]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #204]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #220]\n\t" "adc r4, r4, #0\n\t" - "# a[i+52] += m[52] * mu\n\t" - "ldr r7, [%[m], #208]\n\t" - "ldr r9, [%[a], #208]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #208]\n\t" + /* a[i+56] += m[56] * mu */ + "ldr r7, [%[m], #224]\n\t" + "ldr r10, [%[a], #224]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+53] += m[53] * mu\n\t" - "ldr r7, [%[m], #212]\n\t" - "ldr r9, [%[a], #212]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #224]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+57] += m[57] * mu */ + "ldr r7, [%[m], #228]\n\t" + "ldr r10, [%[a], #228]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #212]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #228]\n\t" "adc r4, r4, #0\n\t" - "# a[i+54] += m[54] * mu\n\t" - "ldr r7, [%[m], #216]\n\t" - "ldr r9, [%[a], #216]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #216]\n\t" + /* a[i+58] += m[58] * mu */ + "ldr r7, [%[m], #232]\n\t" + "ldr r10, [%[a], #232]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+55] += m[55] * mu\n\t" - "ldr r7, [%[m], #220]\n\t" - "ldr r9, [%[a], #220]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #232]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+59] += m[59] * mu */ + "ldr r7, [%[m], #236]\n\t" + "ldr r10, [%[a], #236]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #220]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #236]\n\t" "adc r4, r4, #0\n\t" - "# a[i+56] += m[56] * mu\n\t" - "ldr r7, [%[m], #224]\n\t" - "ldr r9, [%[a], #224]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #224]\n\t" + /* a[i+60] += m[60] * mu */ + "ldr r7, [%[m], #240]\n\t" + "ldr r10, [%[a], #240]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+57] += m[57] * mu\n\t" - "ldr r7, [%[m], #228]\n\t" - "ldr r9, [%[a], #228]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #240]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+61] += m[61] * mu */ + "ldr r7, [%[m], #244]\n\t" + "ldr r10, [%[a], #244]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #228]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #244]\n\t" "adc r4, r4, #0\n\t" - "# a[i+58] += m[58] * mu\n\t" - "ldr r7, [%[m], #232]\n\t" - "ldr r9, [%[a], #232]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #232]\n\t" + /* a[i+62] += m[62] * mu */ + "ldr r7, [%[m], #248]\n\t" + "ldr r10, [%[a], #248]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+59] += m[59] * mu\n\t" - "ldr r7, [%[m], #236]\n\t" - "ldr r9, [%[a], #236]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #236]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+60] += m[60] * mu\n\t" - "ldr r7, [%[m], #240]\n\t" - "ldr r9, [%[a], #240]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #240]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #248]\n\t" "adc r5, r5, #0\n\t" - "# a[i+61] += m[61] * mu\n\t" - "ldr r7, [%[m], #244]\n\t" - "ldr r9, [%[a], #244]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #244]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+62] += m[62] * mu\n\t" - "ldr r7, [%[m], #248]\n\t" - "ldr r9, [%[a], #248]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #248]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+63] += m[63] * mu\n\t" + /* a[i+63] += m[63] * mu */ +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r7, [%[m], #252]\n\t" - "ldr r9, [%[a], #252]\n\t" +#else + "ldr r11, [%[m], #252]\n\t" +#endif + "ldr r10, [%[a], #252]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #252]\n\t" - "ldr r9, [%[a], #256]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #256]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #252]\n\t" + "ldr r10, [%[a], #256]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #256]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #256\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #0x100\n\t" + "blt L_sp_2048_mont_reduce_64_word_%=\n\t" + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); - - sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca); + sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - mp); } /* Multiply two Montgomery form numbers mod the modulus (prime). @@ -5568,31 +14786,30 @@ SP_NOINLINE static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #256\n\t" - "\n1:\n\t" - "rsbs %[c], %[c], #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x100\n\t" + "\n" + "L_sp_2048_sub_64_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r4, r4\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_2048_sub_64_word_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -5602,134 +14819,131 @@ static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV /* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. @@ -5741,57 +14955,184 @@ static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, */ static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, sp_digit div) +{ + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_2048_word_64_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_2048_word_64_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -5904,750 +15245,742 @@ static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m) */ static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) { - sp_digit r = -1; - sp_digit one = 1; - - + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #252\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "mov r4, #0xfc\n\t" + "\n" + "L_sp_2048_cmp_64_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_2048_cmp_64_words_%=\n\t" + "eor r2, r2, r3\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #252]\n\t" - "ldr r5, [%[b], #252]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "ldr r12, [%[a], #252]\n\t" + "ldr lr, [%[b], #252]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #248]\n\t" - "ldr r5, [%[b], #248]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #248]\n\t" + "ldr lr, [%[b], #248]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #244]\n\t" - "ldr r5, [%[b], #244]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #244]\n\t" + "ldr lr, [%[b], #244]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #240]\n\t" - "ldr r5, [%[b], #240]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #240]\n\t" + "ldr lr, [%[b], #240]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #236]\n\t" - "ldr r5, [%[b], #236]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #236]\n\t" + "ldr lr, [%[b], #236]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #232]\n\t" - "ldr r5, [%[b], #232]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #232]\n\t" + "ldr lr, [%[b], #232]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #228]\n\t" - "ldr r5, [%[b], #228]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #228]\n\t" + "ldr lr, [%[b], #228]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #224]\n\t" - "ldr r5, [%[b], #224]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #224]\n\t" + "ldr lr, [%[b], #224]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #220]\n\t" - "ldr r5, [%[b], #220]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #220]\n\t" + "ldr lr, [%[b], #220]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #216]\n\t" - "ldr r5, [%[b], #216]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #216]\n\t" + "ldr lr, [%[b], #216]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #212]\n\t" - "ldr r5, [%[b], #212]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #212]\n\t" + "ldr lr, [%[b], #212]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #208]\n\t" - "ldr r5, [%[b], #208]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #208]\n\t" + "ldr lr, [%[b], #208]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #204]\n\t" - "ldr r5, [%[b], #204]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #204]\n\t" + "ldr lr, [%[b], #204]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #200]\n\t" - "ldr r5, [%[b], #200]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #200]\n\t" + "ldr lr, [%[b], #200]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #196]\n\t" - "ldr r5, [%[b], #196]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #196]\n\t" + "ldr lr, [%[b], #196]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #192]\n\t" - "ldr r5, [%[b], #192]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #192]\n\t" + "ldr lr, [%[b], #192]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #188]\n\t" - "ldr r5, [%[b], #188]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #188]\n\t" + "ldr lr, [%[b], #188]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r5, [%[b], #184]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #184]\n\t" + "ldr lr, [%[b], #184]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #180]\n\t" - "ldr r5, [%[b], #180]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #180]\n\t" + "ldr lr, [%[b], #180]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r5, [%[b], #176]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #176]\n\t" + "ldr lr, [%[b], #176]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #172]\n\t" - "ldr r5, [%[b], #172]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #172]\n\t" + "ldr lr, [%[b], #172]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r5, [%[b], #168]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #168]\n\t" + "ldr lr, [%[b], #168]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #164]\n\t" - "ldr r5, [%[b], #164]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #164]\n\t" + "ldr lr, [%[b], #164]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r5, [%[b], #160]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #160]\n\t" + "ldr lr, [%[b], #160]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #156]\n\t" - "ldr r5, [%[b], #156]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #156]\n\t" + "ldr lr, [%[b], #156]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r5, [%[b], #152]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #152]\n\t" + "ldr lr, [%[b], #152]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #148]\n\t" - "ldr r5, [%[b], #148]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #148]\n\t" + "ldr lr, [%[b], #148]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r5, [%[b], #144]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #144]\n\t" + "ldr lr, [%[b], #144]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #140]\n\t" - "ldr r5, [%[b], #140]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #140]\n\t" + "ldr lr, [%[b], #140]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r5, [%[b], #136]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #136]\n\t" + "ldr lr, [%[b], #136]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #132]\n\t" - "ldr r5, [%[b], #132]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #132]\n\t" + "ldr lr, [%[b], #132]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r5, [%[b], #128]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #128]\n\t" + "ldr lr, [%[b], #128]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #124]\n\t" + "ldr lr, [%[b], #124]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #120]\n\t" + "ldr lr, [%[b], #120]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #116]\n\t" + "ldr lr, [%[b], #116]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #112]\n\t" + "ldr lr, [%[b], #112]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #108]\n\t" + "ldr lr, [%[b], #108]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #104]\n\t" + "ldr lr, [%[b], #104]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #100]\n\t" + "ldr lr, [%[b], #100]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #96]\n\t" + "ldr lr, [%[b], #96]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #92]\n\t" + "ldr lr, [%[b], #92]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #88]\n\t" + "ldr lr, [%[b], #88]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #84]\n\t" + "ldr lr, [%[b], #84]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #80]\n\t" + "ldr lr, [%[b], #80]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #76]\n\t" + "ldr lr, [%[b], #76]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #72]\n\t" + "ldr lr, [%[b], #72]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #68]\n\t" + "ldr lr, [%[b], #68]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #64]\n\t" + "ldr lr, [%[b], #64]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #60]\n\t" + "ldr lr, [%[b], #60]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #56]\n\t" + "ldr lr, [%[b], #56]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #52]\n\t" + "ldr lr, [%[b], #52]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #48]\n\t" + "ldr lr, [%[b], #48]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); -#endif - - return r; + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -7178,35 +16511,33 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r7, #0\n\t" + "mov lr, #0\n\t" "mov r6, #0\n\t" - "1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" + "mov r12, #0\n\t" + "\n" + "L_sp_2048_cond_add_32_words_%=: \n\t" + "adds lr, lr, #-1\n\t" + "ldr r4, [%[a], r12]\n\t" + "ldr r5, [%[b], r12]\n\t" "and r5, r5, %[m]\n\t" "adcs r4, r4, r5\n\t" - "adc %[c], r7, r7\n\t" - "str r4, [%[r], r6]\n\t" - "add r6, r6, #4\n\t" - "cmp r6, #128\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7" + "adc lr, r6, r6\n\t" + "str r4, [%[r], r12]\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #0x80\n\t" + "blt L_sp_2048_cond_add_32_words_%=\n\t" + "mov %[r], lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)r; } -#endif /* WOLFSSL_SP_SMALL */ -#ifndef WOLFSSL_SP_SMALL +#else /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -7215,328 +16546,131 @@ static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_dig * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r8, #0\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" -#else - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" -#endif + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" -#else - "strd r4, r5, [%[r], #0]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" -#else - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #8]\n\t" - "str r5, [%[r], #12]\n\t" -#else - "strd r4, r5, [%[r], #8]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" -#else - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" -#else - "strd r4, r5, [%[r], #16]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" -#else - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #24]\n\t" - "str r5, [%[r], #28]\n\t" -#else - "strd r4, r5, [%[r], #24]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" -#else - "ldrd r4, r5, [%[a], #32]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" -#else - "strd r4, r5, [%[r], #32]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" -#else - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #40]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #40]\n\t" - "str r5, [%[r], #44]\n\t" -#else - "strd r4, r5, [%[r], #40]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[a], #52]\n\t" - "ldr r6, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" -#else - "ldrd r4, r5, [%[a], #48]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #48]\n\t" - "str r5, [%[r], #52]\n\t" -#else - "strd r4, r5, [%[r], #48]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[a], #60]\n\t" - "ldr r6, [%[b], #56]\n\t" - "ldr r7, [%[b], #60]\n\t" -#else - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #56]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #56]\n\t" - "str r5, [%[r], #60]\n\t" -#else - "strd r4, r5, [%[r], #56]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[a], #68]\n\t" - "ldr r6, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" -#else - "ldrd r4, r5, [%[a], #64]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #64]\n\t" - "str r5, [%[r], #68]\n\t" -#else - "strd r4, r5, [%[r], #64]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[a], #76]\n\t" - "ldr r6, [%[b], #72]\n\t" - "ldr r7, [%[b], #76]\n\t" -#else - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #72]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #72]\n\t" - "str r5, [%[r], #76]\n\t" -#else - "strd r4, r5, [%[r], #72]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[a], #84]\n\t" - "ldr r6, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" -#else - "ldrd r4, r5, [%[a], #80]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #80]\n\t" - "str r5, [%[r], #84]\n\t" -#else - "strd r4, r5, [%[r], #80]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[a], #92]\n\t" - "ldr r6, [%[b], #88]\n\t" - "ldr r7, [%[b], #92]\n\t" -#else - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #88]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #88]\n\t" - "str r5, [%[r], #92]\n\t" -#else - "strd r4, r5, [%[r], #88]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[a], #100]\n\t" - "ldr r6, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" -#else - "ldrd r4, r5, [%[a], #96]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #96]\n\t" - "str r5, [%[r], #100]\n\t" -#else - "strd r4, r5, [%[r], #96]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[a], #108]\n\t" - "ldr r6, [%[b], #104]\n\t" - "ldr r7, [%[b], #108]\n\t" -#else - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #104]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #104]\n\t" - "str r5, [%[r], #108]\n\t" -#else - "strd r4, r5, [%[r], #104]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[a], #116]\n\t" - "ldr r6, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" -#else - "ldrd r4, r5, [%[a], #112]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #112]\n\t" - "str r5, [%[r], #116]\n\t" -#else - "strd r4, r5, [%[r], #112]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[a], #124]\n\t" - "ldr r6, [%[b], #120]\n\t" - "ldr r7, [%[b], #124]\n\t" -#else - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #120]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #120]\n\t" - "str r5, [%[r], #124]\n\t" -#else - "strd r4, r5, [%[r], #120]\n\t" -#endif - "adc %[c], r8, r8\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8" + "stm %[r]!, {r4, r5}\n\t" + "adc %[r], r8, r8\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } -#endif /* !WOLFSSL_SP_SMALL */ +#endif /* WOLFSSL_SP_SMALL */ /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -7854,394 +16988,394 @@ int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod, static void sp_2048_lshift_64(sp_digit* r, const sp_digit* a, byte n) { __asm__ __volatile__ ( - "rsb r6, %[n], #31\n\t" - "ldr r3, [%[a], #252]\n\t" - "lsr r4, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r4, r4, r6\n\t" - "ldr r2, [%[a], #248]\n\t" - "str r4, [%[r], #256]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #244]\n\t" - "str r3, [%[r], #252]\n\t" - "lsr r5, r4, #1\n\t" + "rsb r12, %[n], #31\n\t" + "ldr r5, [%[a], #252]\n\t" + "lsr r6, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r6, r6, r12\n\t" + "ldr r4, [%[a], #248]\n\t" + "str r6, [%[r], #256]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #240]\n\t" - "str r2, [%[r], #248]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #236]\n\t" - "str r4, [%[r], #244]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #232]\n\t" - "str r3, [%[r], #240]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #244]\n\t" + "str r5, [%[r], #252]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #240]\n\t" + "str r4, [%[r], #248]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #236]\n\t" + "str r6, [%[r], #244]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #228]\n\t" - "str r2, [%[r], #236]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #224]\n\t" - "str r4, [%[r], #232]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #220]\n\t" - "str r3, [%[r], #228]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #232]\n\t" + "str r5, [%[r], #240]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #228]\n\t" + "str r4, [%[r], #236]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #224]\n\t" + "str r6, [%[r], #232]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #216]\n\t" - "str r2, [%[r], #224]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #212]\n\t" - "str r4, [%[r], #220]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #208]\n\t" - "str r3, [%[r], #216]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #220]\n\t" + "str r5, [%[r], #228]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #216]\n\t" + "str r4, [%[r], #224]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #212]\n\t" + "str r6, [%[r], #220]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #204]\n\t" - "str r2, [%[r], #212]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #200]\n\t" - "str r4, [%[r], #208]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #196]\n\t" - "str r3, [%[r], #204]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #208]\n\t" + "str r5, [%[r], #216]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #204]\n\t" + "str r4, [%[r], #212]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #200]\n\t" + "str r6, [%[r], #208]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #192]\n\t" - "str r2, [%[r], #200]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #188]\n\t" - "str r4, [%[r], #196]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #184]\n\t" - "str r3, [%[r], #192]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #196]\n\t" + "str r5, [%[r], #204]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #192]\n\t" + "str r4, [%[r], #200]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #188]\n\t" + "str r6, [%[r], #196]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #180]\n\t" - "str r2, [%[r], #188]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #176]\n\t" - "str r4, [%[r], #184]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #172]\n\t" - "str r3, [%[r], #180]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #184]\n\t" + "str r5, [%[r], #192]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #180]\n\t" + "str r4, [%[r], #188]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #176]\n\t" + "str r6, [%[r], #184]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #168]\n\t" - "str r2, [%[r], #176]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #164]\n\t" - "str r4, [%[r], #172]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #160]\n\t" - "str r3, [%[r], #168]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #172]\n\t" + "str r5, [%[r], #180]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #168]\n\t" + "str r4, [%[r], #176]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #164]\n\t" + "str r6, [%[r], #172]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #156]\n\t" - "str r2, [%[r], #164]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #152]\n\t" - "str r4, [%[r], #160]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #148]\n\t" - "str r3, [%[r], #156]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #160]\n\t" + "str r5, [%[r], #168]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #156]\n\t" + "str r4, [%[r], #164]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #152]\n\t" + "str r6, [%[r], #160]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #144]\n\t" - "str r2, [%[r], #152]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #140]\n\t" - "str r4, [%[r], #148]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #136]\n\t" - "str r3, [%[r], #144]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #148]\n\t" + "str r5, [%[r], #156]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #144]\n\t" + "str r4, [%[r], #152]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #140]\n\t" + "str r6, [%[r], #148]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #132]\n\t" - "str r2, [%[r], #140]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #128]\n\t" - "str r4, [%[r], #136]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #124]\n\t" - "str r3, [%[r], #132]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #136]\n\t" + "str r5, [%[r], #144]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #132]\n\t" + "str r4, [%[r], #140]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #128]\n\t" + "str r6, [%[r], #136]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #120]\n\t" - "str r2, [%[r], #128]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #116]\n\t" - "str r4, [%[r], #124]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #112]\n\t" - "str r3, [%[r], #120]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #124]\n\t" + "str r5, [%[r], #132]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #120]\n\t" + "str r4, [%[r], #128]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #116]\n\t" + "str r6, [%[r], #124]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #108]\n\t" - "str r2, [%[r], #116]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #104]\n\t" - "str r4, [%[r], #112]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #100]\n\t" - "str r3, [%[r], #108]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #112]\n\t" + "str r5, [%[r], #120]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #108]\n\t" + "str r4, [%[r], #116]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #104]\n\t" + "str r6, [%[r], #112]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #96]\n\t" - "str r2, [%[r], #104]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #92]\n\t" - "str r4, [%[r], #100]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #88]\n\t" - "str r3, [%[r], #96]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #100]\n\t" + "str r5, [%[r], #108]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #96]\n\t" + "str r4, [%[r], #104]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #92]\n\t" + "str r6, [%[r], #100]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #84]\n\t" - "str r2, [%[r], #92]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #80]\n\t" - "str r4, [%[r], #88]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #76]\n\t" - "str r3, [%[r], #84]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #88]\n\t" + "str r5, [%[r], #96]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #84]\n\t" + "str r4, [%[r], #92]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #80]\n\t" + "str r6, [%[r], #88]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #72]\n\t" - "str r2, [%[r], #80]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #68]\n\t" - "str r4, [%[r], #76]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #64]\n\t" - "str r3, [%[r], #72]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #76]\n\t" + "str r5, [%[r], #84]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #72]\n\t" + "str r4, [%[r], #80]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #68]\n\t" + "str r6, [%[r], #76]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #60]\n\t" - "str r2, [%[r], #68]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #56]\n\t" - "str r4, [%[r], #64]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #52]\n\t" - "str r3, [%[r], #60]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #64]\n\t" + "str r5, [%[r], #72]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r6, [%[r], #64]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #48]\n\t" - "str r2, [%[r], #56]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #44]\n\t" - "str r4, [%[r], #52]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #40]\n\t" - "str r3, [%[r], #48]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #52]\n\t" + "str r5, [%[r], #60]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r6, [%[r], #52]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #36]\n\t" - "str r2, [%[r], #44]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #32]\n\t" - "str r4, [%[r], #40]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #28]\n\t" - "str r3, [%[r], #36]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #40]\n\t" + "str r5, [%[r], #48]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r6, [%[r], #40]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #24]\n\t" - "str r2, [%[r], #32]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #20]\n\t" - "str r4, [%[r], #28]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #16]\n\t" - "str r3, [%[r], #24]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #28]\n\t" + "str r5, [%[r], #36]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r6, [%[r], #28]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #12]\n\t" - "str r2, [%[r], #20]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #8]\n\t" - "str r4, [%[r], #16]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #4]\n\t" - "str r3, [%[r], #12]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #16]\n\t" + "str r5, [%[r], #24]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r6, [%[r], #16]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #0]\n\t" - "str r2, [%[r], #8]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #4]\n\t" + "str r5, [%[r], #12]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a]]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "str r5, [%[r]]\n\t" + "str r6, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : [r] "r" (r), [a] "r" (a), [n] "r" (n) - : "memory", "r2", "r3", "r4", "r5", "r6" + : "memory", "r4", "r5", "r6", "r3", "r12" ); } @@ -8663,980 +17797,5476 @@ static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) __asm__ __volatile__ ( "sub sp, sp, #48\n\t" "mov r10, #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r11, [%[a], #0]\n\t" - "ldr r12, [%[b], #0]\n\t" + /* A[0] * B[0] */ + "ldr r11, [%[a]]\n\t" + "ldr r12, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r3, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "mov r5, #0\n\t" +#else "umull r3, r4, r11, r12\n\t" "mov r5, #0\n\t" +#endif "str r3, [sp]\n\t" - "# A[0] * B[1]\n\t" + /* A[0] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[0]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[0] */ "ldr r8, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #4]\n\t" - "# A[2] * B[0]\n\t" + /* A[2] * B[0] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[1] */ "ldr r11, [%[a], #4]\n\t" "ldr r12, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[2]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[2] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #8]\n\t" - "# A[0] * B[3]\n\t" + /* A[0] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[2]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[2] */ "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[1] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[0] */ "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #12]\n\t" - "# A[4] * B[0]\n\t" + /* A[4] * B[0] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[1] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[2] */ "ldr r11, [%[a], #8]\n\t" "ldr r12, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[3] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[4]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[4] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #16]\n\t" - "# A[0] * B[5]\n\t" + /* A[0] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[4]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[4] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[2] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[1] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[0] */ "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #20]\n\t" - "# A[6] * B[0]\n\t" + /* A[6] * B[0] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[1]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[1] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[2] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[3] */ "ldr r11, [%[a], #12]\n\t" "ldr r12, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[4] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[5] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[0] * B[6]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[0] * B[6] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #24]\n\t" - "# A[0] * B[7]\n\t" + /* A[0] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[6]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[6] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[5] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[4] */ "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[3] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[2] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[1]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[1] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[0] */ "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #28]\n\t" - "# A[8] * B[0]\n\t" + /* A[8] * B[0] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[7] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[1] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[2] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[3] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[4] */ "ldr r11, [%[a], #16]\n\t" "ldr r12, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[5] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[6] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[1] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[7] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[8]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[8] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #32]\n\t" - "# A[0] * B[9]\n\t" + /* A[0] * B[9] */ "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[8]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[8] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[7] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[6] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[4] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[3] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[2] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[1] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[0] */ "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #36]\n\t" - "# A[10] * B[0]\n\t" + /* A[10] * B[0] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[9] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[1] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[2] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[3] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[4] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[5] */ "ldr r11, [%[a], #20]\n\t" "ldr r12, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[6] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[7] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[8] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[9] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[10]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[10] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #40]\n\t" - "# A[0] * B[11]\n\t" + /* A[0] * B[11] */ "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[10]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[10] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[9] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[8] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[7] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[6] */ "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[5] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[4] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[3] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[2] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[1] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[0] */ "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #44]\n\t" - "# A[11] * B[1]\n\t" + /* A[11] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[10] * B[2]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[2] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[3] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[4] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[5] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[6] */ "ldr r11, [%[a], #24]\n\t" "ldr r12, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[7] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[8] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[9] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[10] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[11] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #48]\n\t" - "# A[2] * B[11]\n\t" + /* A[2] * B[11] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[10]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[10] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[9] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[8] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[6] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[5] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[4] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[3] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[2] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #52]\n\t" - "# A[11] * B[3]\n\t" + /* A[11] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[10] * B[4]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[4] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[5] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[6] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[7] */ "ldr r11, [%[a], #28]\n\t" "ldr r12, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[8] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[9] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[10]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[10] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[11] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #56]\n\t" - "# A[4] * B[11]\n\t" + /* A[4] * B[11] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[10]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[10] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[9] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[8] */ "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[7] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[6] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[5] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[4] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #60]\n\t" - "# A[11] * B[5]\n\t" + /* A[11] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[10] * B[6]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[6] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[7] */ "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[8] */ "ldr r11, [%[a], #32]\n\t" "ldr r12, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[9] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[10]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[10] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[11]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[11] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #64]\n\t" - "# A[6] * B[11]\n\t" + /* A[6] * B[11] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[7] * B[10]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[10] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[9] */ "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[8] */ "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[7] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[6] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #68]\n\t" - "# A[11] * B[7]\n\t" + /* A[11] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[10] * B[8]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[8] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[9] */ "ldr r11, [%[a], #36]\n\t" "ldr r12, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[10] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[11] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #72]\n\t" - "# A[8] * B[11]\n\t" + /* A[8] * B[11] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[9] * B[10]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[10] */ "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[9] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[8] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #76]\n\t" - "# A[11] * B[9]\n\t" + /* A[11] * B[9] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[10] * B[10]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[10] */ "ldr r11, [%[a], #40]\n\t" "ldr r12, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[11] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #80]\n\t" - "# A[10] * B[11]\n\t" + /* A[10] * B[11] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[11] * B[10]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[10] */ "ldr r8, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #84]\n\t" - "# A[11] * B[11]\n\t" + /* A[11] * B[11] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" +#endif "str r4, [%[r], #88]\n\t" "str r5, [%[r], #92]\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" @@ -9645,9 +23275,8 @@ static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) "stm %[r]!, {r3, r4, r5, r6}\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sub %[r], %[r], #48\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } @@ -9658,41 +23287,37 @@ static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "adc %[c], r14, r14\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "adc %[r], r12, r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); - - return c; + return (uint32_t)(size_t)r; } /* Sub b from a into a. (a -= b) @@ -9702,58 +23327,55 @@ static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, */ static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r11, r11\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -9762,62 +23384,58 @@ static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "adc %[c], r14, r14\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "adc %[r], r12, r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -9896,100 +23514,97 @@ SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a, */ static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r11, r11\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -9998,104 +23613,100 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "adc %[c], r14, r14\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "adc %[r], r12, r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -10174,184 +23785,181 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, */ static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r11, r11\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -10360,188 +23968,184 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "adc %[c], r14, r14\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "adc %[r], r12, r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -10622,108 +24226,514 @@ static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( "sub sp, sp, #48\n\t" - "mov r12, #0\n\t" - "# A[0] * A[0]\n\t" - "ldr r10, [%[a], #0]\n\t" + /* A[0] * A[0] */ + "ldr r10, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r9, r10, #16\n\t" + "lsl r2, r10, #16\n\t" + "lsr r2, r2, #16\n\t" + "mul r8, r2, r2\n\t" + "mul r3, r9, r9\n\t" + "mul r2, r9, r2\n\t" + "lsr r9, r2, #15\n\t" + "lsl r2, r2, #17\n\t" + "adds r8, r8, r2\n\t" + "adc r3, r3, r9\n\t" +#else "umull r8, r3, r10, r10\n\t" +#endif "mov r4, #0\n\t" "str r8, [sp]\n\t" - "# A[0] * A[1]\n\t" + /* A[0] * A[1] */ "ldr r10, [%[a], #4]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #4]\n\t" - "# A[0] * A[2]\n\t" + /* A[0] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[1] * A[1]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * A[1] */ "ldr r10, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #8]\n\t" - "# A[0] * A[3]\n\t" + /* A[0] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[1] * A[2]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [sp, #12]\n\t" - "# A[0] * A[4]\n\t" + /* A[0] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[1] * A[3]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[1] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[2] * A[2]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[2] * A[2] */ "ldr r10, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #16]\n\t" - "# A[0] * A[5]\n\t" + /* A[0] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[4]\n\t" + /* A[1] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -10731,66 +24741,294 @@ static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #20]\n\t" - "# A[0] * A[6]\n\t" + /* A[0] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[5]\n\t" + /* A[1] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[3] */ "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #24]\n\t" - "# A[0] * A[7]\n\t" + /* A[0] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[6]\n\t" + /* A[1] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -10798,80 +25036,370 @@ static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #28]\n\t" - "# A[0] * A[8]\n\t" + /* A[0] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[7]\n\t" + /* A[1] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[4] */ "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #32]\n\t" - "# A[0] * A[9]\n\t" + /* A[0] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[8]\n\t" + /* A[1] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -10879,94 +25407,446 @@ static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #36]\n\t" - "# A[0] * A[10]\n\t" + /* A[0] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[9]\n\t" + /* A[1] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[5] */ "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #40]\n\t" - "# A[0] * A[11]\n\t" + /* A[0] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[10]\n\t" + /* A[1] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -10974,87 +25854,408 @@ static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #44]\n\t" - "# A[1] * A[11]\n\t" + /* A[1] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[2] * A[10]\n\t" + /* A[2] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[6] */ "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #48]\n\t" - "# A[2] * A[11]\n\t" + /* A[2] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[3] * A[10]\n\t" + /* A[3] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -11062,73 +26263,332 @@ static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #52]\n\t" - "# A[3] * A[11]\n\t" + /* A[3] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[4] * A[10]\n\t" + /* A[4] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[7] */ "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #56]\n\t" - "# A[4] * A[11]\n\t" + /* A[4] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[5] * A[10]\n\t" + /* A[5] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -11136,59 +26596,256 @@ static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #60]\n\t" - "# A[5] * A[11]\n\t" + /* A[5] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[6] * A[10]\n\t" + /* A[6] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[8] */ "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #64]\n\t" - "# A[6] * A[11]\n\t" + /* A[6] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[7] * A[10]\n\t" + /* A[7] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -11196,87 +26853,415 @@ static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #68]\n\t" - "# A[7] * A[11]\n\t" + /* A[7] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[8] * A[10]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[9] * A[9]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * A[9] */ "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [%[r], #72]\n\t" - "# A[8] * A[11]\n\t" + /* A[8] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[9] * A[10]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[9] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [%[r], #76]\n\t" - "# A[9] * A[11]\n\t" + /* A[9] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[10] * A[10]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * A[10] */ "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #80]\n\t" - "# A[10] * A[11]\n\t" + /* A[10] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [%[r], #84]\n\t" - "# A[11] * A[11]\n\t" + /* A[11] * A[11] */ "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adc r4, r4, r9\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adc r4, r4, r9\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adc r4, r4, r9\n\t" +#endif "str r3, [%[r], #88]\n\t" "str r4, [%[r], #92]\n\t" "ldm sp!, {r2, r3, r4, r8}\n\t" @@ -11285,10 +27270,9 @@ static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "stm %[r]!, {r2, r3, r4, r8}\n\t" "ldm sp!, {r2, r3, r4, r8}\n\t" "stm %[r]!, {r2, r3, r4, r8}\n\t" - "sub %[r], %[r], #48\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r12" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); } @@ -11298,40 +27282,36 @@ static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); - - return c; + return (uint32_t)(size_t)r; } /* Square a and put result in r. (r = a * a) @@ -11376,61 +27356,57 @@ SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_24(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_sub_24(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); - - return c; + return (uint32_t)(size_t)r; } /* Square a and put result in r. (r = a * a) @@ -11475,103 +27451,99 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); - - return c; + return (uint32_t)(size_t)r; } /* Square a and put result in r. (r = a * a) @@ -11618,15 +27590,14 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #384\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #0x180\n\t" + "\n" + "L_sp_3072_add_96_word_%=: \n\t" + "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" @@ -11635,15 +27606,15 @@ static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, "adcs r7, r7, r11\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "adc r3, r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne L_sp_3072_add_96_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -11655,29 +27626,29 @@ static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, */ static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #384\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r10, #0\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x180\n\t" + "\n" + "L_sp_3072_sub_in_pkace_96_word_%=: \n\t" + "subs r12, r10, r12\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r10, r10\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_3072_sub_in_pkace_96_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -11691,54 +27662,81 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b) { __asm__ __volatile__ ( - "sub sp, sp, #768\n\t" + "sub sp, sp, #0x300\n\t" "mov r5, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #380\n\t" + "\n" + "L_sp_3072_mul_96_outer_%=: \n\t" + "subs r3, r5, #0x17c\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_3072_mul_96_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #384\n\t" - "beq 3f\n\t" + "cmp r3, #0x180\n\t" + "beq L_sp_3072_mul_96_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_3072_mul_96_inner_%=\n\t" + "\n" + "L_sp_3072_mul_96_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #760\n\t" - "ble 1b\n\t" + "cmp r5, #0x2f8\n\t" + "ble L_sp_3072_mul_96_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_3072_mul_96_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "bgt L_sp_3072_mul_96_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); } @@ -11750,77 +27748,132 @@ static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b) static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "sub sp, sp, #768\n\t" + "sub sp, sp, #0x300\n\t" "mov r12, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" "mov r5, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #380\n\t" + "\n" + "L_sp_3072_sqr_96_outer_%=: \n\t" + "subs r3, r5, #0x17c\n\t" "it cc\n\t" "movcc r3, r12\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" + "\n" + "L_sp_3072_sqr_96_inner_%=: \n\t" "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "beq L_sp_3072_sqr_96_op_sqr_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "bal L_sp_3072_sqr_96_op_done_%=\n\t" + "\n" + "L_sp_3072_sqr_96_op_sqr_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_3072_sqr_96_op_done_%=: \n\t" "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #384\n\t" - "beq 3f\n\t" + "cmp r3, #0x180\n\t" + "beq L_sp_3072_sqr_96_inner_done_%=\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" + "bgt L_sp_3072_sqr_96_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_3072_sqr_96_inner_%=\n\t" + "\n" + "L_sp_3072_sqr_96_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #760\n\t" - "ble 1b\n\t" + "cmp r5, #0x2f8\n\t" + "ble L_sp_3072_sqr_96_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r9, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r9, [%[r], #12]\n\t" -#else - "ldrd r6, r7, [sp, #0]\n\t" - "ldrd r8, r9, [sp, #8]\n\t" - "strd r6, r7, [%[r], #0]\n\t" - "strd r8, r9, [%[r], #8]\n\t" -#endif - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_3072_sqr_96_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "bgt L_sp_3072_sqr_96_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "r12" ); } @@ -11850,15 +27903,14 @@ static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #192\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #0xc0\n\t" + "\n" + "L_sp_3072_add_48_word_%=: \n\t" + "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" @@ -11867,15 +27919,15 @@ static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, "adcs r7, r7, r11\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "adc r3, r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne L_sp_3072_add_48_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -11887,29 +27939,29 @@ static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, */ static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #192\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r10, #0\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0xc0\n\t" + "\n" + "L_sp_3072_sub_in_pkace_48_word_%=: \n\t" + "subs r12, r10, r12\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r10, r10\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_3072_sub_in_pkace_48_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -11923,54 +27975,81 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) { __asm__ __volatile__ ( - "sub sp, sp, #384\n\t" + "sub sp, sp, #0x180\n\t" "mov r5, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #188\n\t" + "\n" + "L_sp_3072_mul_48_outer_%=: \n\t" + "subs r3, r5, #0xbc\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_3072_mul_48_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #192\n\t" - "beq 3f\n\t" + "cmp r3, #0xc0\n\t" + "beq L_sp_3072_mul_48_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_3072_mul_48_inner_%=\n\t" + "\n" + "L_sp_3072_mul_48_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #376\n\t" - "ble 1b\n\t" + "cmp r5, #0x178\n\t" + "ble L_sp_3072_mul_48_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_3072_mul_48_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "bgt L_sp_3072_mul_48_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); } @@ -11982,77 +28061,132 @@ static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "sub sp, sp, #384\n\t" + "sub sp, sp, #0x180\n\t" "mov r12, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" "mov r5, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #188\n\t" + "\n" + "L_sp_3072_sqr_48_outer_%=: \n\t" + "subs r3, r5, #0xbc\n\t" "it cc\n\t" "movcc r3, r12\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" + "\n" + "L_sp_3072_sqr_48_inner_%=: \n\t" "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "beq L_sp_3072_sqr_48_op_sqr_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "bal L_sp_3072_sqr_48_op_done_%=\n\t" + "\n" + "L_sp_3072_sqr_48_op_sqr_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_3072_sqr_48_op_done_%=: \n\t" "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #192\n\t" - "beq 3f\n\t" + "cmp r3, #0xc0\n\t" + "beq L_sp_3072_sqr_48_inner_done_%=\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" + "bgt L_sp_3072_sqr_48_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_3072_sqr_48_inner_%=\n\t" + "\n" + "L_sp_3072_sqr_48_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #376\n\t" - "ble 1b\n\t" + "cmp r5, #0x178\n\t" + "ble L_sp_3072_sqr_48_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r9, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r9, [%[r], #12]\n\t" -#else - "ldrd r6, r7, [sp, #0]\n\t" - "ldrd r8, r9, [sp, #8]\n\t" - "strd r6, r7, [%[r], #0]\n\t" - "strd r8, r9, [%[r], #8]\n\t" -#endif - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_3072_sqr_48_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "bgt L_sp_3072_sqr_48_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "r12" ); } @@ -12079,723 +28213,3942 @@ static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho) *rho = (sp_digit)0 - x; } +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b) { -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_3072_mul_d_96_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" - "cmp r9, #384\n\t" - "blt 1b\n\t" + "cmp r9, #0x180\n\t" + "blt L_sp_3072_mul_d_96_word_%=\n\t" "str r3, [%[r], #384]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); +} + #else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b) +{ __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else "umull r3, r4, %[b], r8\n\t" +#endif "mov r5, #0\n\t" "str r3, [%[r]], #4\n\t" - "# A[1] * B\n\t" + /* A[1] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[2] * B\n\t" + /* A[2] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[3] * B\n\t" + /* A[3] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[4] * B\n\t" + /* A[4] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[5] * B\n\t" + /* A[5] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[6] * B\n\t" + /* A[6] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[7] * B\n\t" + /* A[7] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[8] * B\n\t" + /* A[8] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[9] * B\n\t" + /* A[9] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[10] * B\n\t" + /* A[10] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[11] * B\n\t" + /* A[11] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[12] * B\n\t" + /* A[12] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[13] * B\n\t" + /* A[13] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[14] * B\n\t" + /* A[14] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[15] * B\n\t" + /* A[15] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[16] * B\n\t" + /* A[16] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[17] * B\n\t" + /* A[17] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[18] * B\n\t" + /* A[18] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[19] * B\n\t" + /* A[19] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[20] * B\n\t" + /* A[20] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[21] * B\n\t" + /* A[21] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[22] * B\n\t" + /* A[22] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[23] * B\n\t" + /* A[23] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[24] * B\n\t" + /* A[24] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[25] * B\n\t" + /* A[25] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[26] * B\n\t" + /* A[26] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[27] * B\n\t" + /* A[27] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[28] * B\n\t" + /* A[28] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[29] * B\n\t" + /* A[29] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[30] * B\n\t" + /* A[30] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[31] * B\n\t" + /* A[31] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[32] * B\n\t" + /* A[32] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[33] * B\n\t" + /* A[33] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[34] * B\n\t" + /* A[34] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[35] * B\n\t" + /* A[35] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[36] * B\n\t" + /* A[36] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[37] * B\n\t" + /* A[37] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[38] * B\n\t" + /* A[38] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[39] * B\n\t" + /* A[39] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[40] * B\n\t" + /* A[40] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[41] * B\n\t" + /* A[41] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[42] * B\n\t" + /* A[42] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[43] * B\n\t" + /* A[43] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[44] * B\n\t" + /* A[44] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[45] * B\n\t" + /* A[45] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[46] * B\n\t" + /* A[46] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[47] * B\n\t" + /* A[47] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[48] * B\n\t" + /* A[48] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[49] * B\n\t" + /* A[49] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[50] * B\n\t" + /* A[50] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[51] * B\n\t" + /* A[51] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[52] * B\n\t" + /* A[52] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[53] * B\n\t" + /* A[53] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[54] * B\n\t" + /* A[54] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[55] * B\n\t" + /* A[55] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[56] * B\n\t" + /* A[56] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[57] * B\n\t" + /* A[57] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[58] * B\n\t" + /* A[58] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[59] * B\n\t" + /* A[59] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[60] * B\n\t" + /* A[60] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[61] * B\n\t" + /* A[61] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[62] * B\n\t" + /* A[62] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[63] * B\n\t" + /* A[63] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[64] * B\n\t" + /* A[64] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[65] * B\n\t" + /* A[65] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[66] * B\n\t" + /* A[66] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[67] * B\n\t" + /* A[67] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[68] * B\n\t" + /* A[68] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[69] * B\n\t" + /* A[69] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[70] * B\n\t" + /* A[70] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[71] * B\n\t" + /* A[71] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[72] * B\n\t" + /* A[72] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[73] * B\n\t" + /* A[73] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[74] * B\n\t" + /* A[74] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[75] * B\n\t" + /* A[75] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[76] * B\n\t" + /* A[76] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[77] * B\n\t" + /* A[77] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[78] * B\n\t" + /* A[78] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[79] * B\n\t" + /* A[79] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[80] * B\n\t" + /* A[80] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[81] * B\n\t" + /* A[81] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[82] * B\n\t" + /* A[82] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[83] * B\n\t" + /* A[83] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[84] * B\n\t" + /* A[84] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[85] * B\n\t" + /* A[85] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[86] * B\n\t" + /* A[86] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[87] * B\n\t" + /* A[87] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[88] * B\n\t" + /* A[88] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[89] * B\n\t" + /* A[89] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[90] * B\n\t" + /* A[90] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[91] * B\n\t" + /* A[91] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[92] * B\n\t" + /* A[92] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[93] * B\n\t" + /* A[93] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[94] * B\n\t" + /* A[94] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[95] * B\n\t" + /* A[95] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adc r3, r3, r7\n\t" +#endif "str r5, [%[r]], #4\n\t" "str r3, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10" ); -#endif } +#endif /* WOLFSSL_SP_SMALL */ #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 3072 bits, just need to subtract. @@ -12811,6 +32164,7 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) sp_3072_sub_in_place_48(r, m); } +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -12819,34 +32173,45 @@ static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r6, [%[b], r8]\n\t" - "and r6, r6, %[m]\n\t" - "sbcs r4, r4, r6\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #192\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_3072_cond_sub_48_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #0xc0\n\t" + "blt L_sp_3072_cond_sub_48_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6" ); -#else - __asm__ __volatile__ ( + return (uint32_t)(size_t)r; +} - "mov r9, #0\n\t" +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +{ + __asm__ __volatile__ ( + "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" @@ -13015,480 +32380,1741 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_dig "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" "stm %[r]!, {r4, r5}\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7" ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#endif /* WOLFSSL_SP_SMALL */ /* Reduce the number back to 3072 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit mp) { - sp_digit ca = 0; - __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_3072_mont_reduce_48_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, r8, r11\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r10, #0\n\t" +#endif + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r10, #0\n\t" +#endif + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" +#endif "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" - "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" "adc r4, r4, #0\n\t" - "# a[i+12] += m[12] * mu\n\t" - "ldr r7, [%[m], #48]\n\t" - "ldr r9, [%[a], #48]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #48]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+13] += m[13] * mu\n\t" - "ldr r7, [%[m], #52]\n\t" - "ldr r9, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #52]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" "adc r4, r4, #0\n\t" - "# a[i+14] += m[14] * mu\n\t" - "ldr r7, [%[m], #56]\n\t" - "ldr r9, [%[a], #56]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #56]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+15] += m[15] * mu\n\t" - "ldr r7, [%[m], #60]\n\t" - "ldr r9, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #60]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" "adc r4, r4, #0\n\t" - "# a[i+16] += m[16] * mu\n\t" - "ldr r7, [%[m], #64]\n\t" - "ldr r9, [%[a], #64]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #64]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+17] += m[17] * mu\n\t" - "ldr r7, [%[m], #68]\n\t" - "ldr r9, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #68]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" "adc r4, r4, #0\n\t" - "# a[i+18] += m[18] * mu\n\t" - "ldr r7, [%[m], #72]\n\t" - "ldr r9, [%[a], #72]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #72]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+19] += m[19] * mu\n\t" - "ldr r7, [%[m], #76]\n\t" - "ldr r9, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #76]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" "adc r4, r4, #0\n\t" - "# a[i+20] += m[20] * mu\n\t" - "ldr r7, [%[m], #80]\n\t" - "ldr r9, [%[a], #80]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #80]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+21] += m[21] * mu\n\t" - "ldr r7, [%[m], #84]\n\t" - "ldr r9, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #84]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" "adc r4, r4, #0\n\t" - "# a[i+22] += m[22] * mu\n\t" - "ldr r7, [%[m], #88]\n\t" - "ldr r9, [%[a], #88]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #88]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+23] += m[23] * mu\n\t" - "ldr r7, [%[m], #92]\n\t" - "ldr r9, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #92]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" "adc r4, r4, #0\n\t" - "# a[i+24] += m[24] * mu\n\t" - "ldr r7, [%[m], #96]\n\t" - "ldr r9, [%[a], #96]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #96]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+25] += m[25] * mu\n\t" - "ldr r7, [%[m], #100]\n\t" - "ldr r9, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #100]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" "adc r4, r4, #0\n\t" - "# a[i+26] += m[26] * mu\n\t" - "ldr r7, [%[m], #104]\n\t" - "ldr r9, [%[a], #104]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #104]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+27] += m[27] * mu\n\t" - "ldr r7, [%[m], #108]\n\t" - "ldr r9, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r7, [%[m], #124]\n\t" + "ldr r10, [%[a], #124]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #108]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" "adc r4, r4, #0\n\t" - "# a[i+28] += m[28] * mu\n\t" - "ldr r7, [%[m], #112]\n\t" - "ldr r9, [%[a], #112]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #112]\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r7, [%[m], #128]\n\t" + "ldr r10, [%[a], #128]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+29] += m[29] * mu\n\t" - "ldr r7, [%[m], #116]\n\t" - "ldr r9, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r7, [%[m], #132]\n\t" + "ldr r10, [%[a], #132]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #116]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #132]\n\t" "adc r4, r4, #0\n\t" - "# a[i+30] += m[30] * mu\n\t" - "ldr r7, [%[m], #120]\n\t" - "ldr r9, [%[a], #120]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #120]\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r7, [%[m], #136]\n\t" + "ldr r10, [%[a], #136]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+31] += m[31] * mu\n\t" - "ldr r7, [%[m], #124]\n\t" - "ldr r9, [%[a], #124]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r7, [%[m], #140]\n\t" + "ldr r10, [%[a], #140]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #124]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #140]\n\t" "adc r4, r4, #0\n\t" - "# a[i+32] += m[32] * mu\n\t" - "ldr r7, [%[m], #128]\n\t" - "ldr r9, [%[a], #128]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #128]\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r7, [%[m], #144]\n\t" + "ldr r10, [%[a], #144]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+33] += m[33] * mu\n\t" - "ldr r7, [%[m], #132]\n\t" - "ldr r9, [%[a], #132]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r7, [%[m], #148]\n\t" + "ldr r10, [%[a], #148]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #132]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #148]\n\t" "adc r4, r4, #0\n\t" - "# a[i+34] += m[34] * mu\n\t" - "ldr r7, [%[m], #136]\n\t" - "ldr r9, [%[a], #136]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #136]\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r7, [%[m], #152]\n\t" + "ldr r10, [%[a], #152]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+35] += m[35] * mu\n\t" - "ldr r7, [%[m], #140]\n\t" - "ldr r9, [%[a], #140]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r7, [%[m], #156]\n\t" + "ldr r10, [%[a], #156]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #140]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #156]\n\t" "adc r4, r4, #0\n\t" - "# a[i+36] += m[36] * mu\n\t" - "ldr r7, [%[m], #144]\n\t" - "ldr r9, [%[a], #144]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #144]\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r7, [%[m], #160]\n\t" + "ldr r10, [%[a], #160]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+37] += m[37] * mu\n\t" - "ldr r7, [%[m], #148]\n\t" - "ldr r9, [%[a], #148]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r7, [%[m], #164]\n\t" + "ldr r10, [%[a], #164]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #148]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #164]\n\t" "adc r4, r4, #0\n\t" - "# a[i+38] += m[38] * mu\n\t" - "ldr r7, [%[m], #152]\n\t" - "ldr r9, [%[a], #152]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #152]\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r7, [%[m], #168]\n\t" + "ldr r10, [%[a], #168]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+39] += m[39] * mu\n\t" - "ldr r7, [%[m], #156]\n\t" - "ldr r9, [%[a], #156]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r7, [%[m], #172]\n\t" + "ldr r10, [%[a], #172]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #156]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #172]\n\t" "adc r4, r4, #0\n\t" - "# a[i+40] += m[40] * mu\n\t" - "ldr r7, [%[m], #160]\n\t" - "ldr r9, [%[a], #160]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #160]\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r7, [%[m], #176]\n\t" + "ldr r10, [%[a], #176]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+41] += m[41] * mu\n\t" - "ldr r7, [%[m], #164]\n\t" - "ldr r9, [%[a], #164]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r7, [%[m], #180]\n\t" + "ldr r10, [%[a], #180]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #164]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #180]\n\t" "adc r4, r4, #0\n\t" - "# a[i+42] += m[42] * mu\n\t" - "ldr r7, [%[m], #168]\n\t" - "ldr r9, [%[a], #168]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #168]\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r7, [%[m], #184]\n\t" + "ldr r10, [%[a], #184]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+43] += m[43] * mu\n\t" - "ldr r7, [%[m], #172]\n\t" - "ldr r9, [%[a], #172]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #172]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+44] += m[44] * mu\n\t" - "ldr r7, [%[m], #176]\n\t" - "ldr r9, [%[a], #176]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #176]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #184]\n\t" "adc r5, r5, #0\n\t" - "# a[i+45] += m[45] * mu\n\t" - "ldr r7, [%[m], #180]\n\t" - "ldr r9, [%[a], #180]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #180]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+46] += m[46] * mu\n\t" - "ldr r7, [%[m], #184]\n\t" - "ldr r9, [%[a], #184]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #184]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+47] += m[47] * mu\n\t" + /* a[i+47] += m[47] * mu */ +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r7, [%[m], #188]\n\t" - "ldr r9, [%[a], #188]\n\t" +#else + "ldr r11, [%[m], #188]\n\t" +#endif + "ldr r10, [%[a], #188]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #188]\n\t" - "ldr r9, [%[a], #192]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #192]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #188]\n\t" + "ldr r10, [%[a], #192]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #192]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #192\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #0xc0\n\t" + "blt L_sp_3072_mont_reduce_48_word_%=\n\t" + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); - - sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca); + sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - mp); } /* Multiply two Montgomery form numbers mod the modulus (prime). @@ -13521,387 +34147,2023 @@ SP_NOINLINE static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a, sp_3072_mont_reduce_48(r, m, mp); } +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) { -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_3072_mul_d_48_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" - "cmp r9, #192\n\t" - "blt 1b\n\t" + "cmp r9, #0xc0\n\t" + "blt L_sp_3072_mul_d_48_word_%=\n\t" "str r3, [%[r], #192]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); +} + #else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) +{ __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else "umull r3, r4, %[b], r8\n\t" +#endif "mov r5, #0\n\t" "str r3, [%[r]], #4\n\t" - "# A[1] * B\n\t" + /* A[1] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[2] * B\n\t" + /* A[2] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[3] * B\n\t" + /* A[3] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[4] * B\n\t" + /* A[4] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[5] * B\n\t" + /* A[5] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[6] * B\n\t" + /* A[6] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[7] * B\n\t" + /* A[7] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[8] * B\n\t" + /* A[8] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[9] * B\n\t" + /* A[9] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[10] * B\n\t" + /* A[10] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[11] * B\n\t" + /* A[11] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[12] * B\n\t" + /* A[12] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[13] * B\n\t" + /* A[13] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[14] * B\n\t" + /* A[14] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[15] * B\n\t" + /* A[15] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[16] * B\n\t" + /* A[16] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[17] * B\n\t" + /* A[17] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[18] * B\n\t" + /* A[18] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[19] * B\n\t" + /* A[19] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[20] * B\n\t" + /* A[20] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[21] * B\n\t" + /* A[21] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[22] * B\n\t" + /* A[22] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[23] * B\n\t" + /* A[23] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[24] * B\n\t" + /* A[24] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[25] * B\n\t" + /* A[25] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[26] * B\n\t" + /* A[26] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[27] * B\n\t" + /* A[27] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[28] * B\n\t" + /* A[28] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[29] * B\n\t" + /* A[29] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[30] * B\n\t" + /* A[30] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[31] * B\n\t" + /* A[31] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[32] * B\n\t" + /* A[32] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[33] * B\n\t" + /* A[33] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[34] * B\n\t" + /* A[34] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[35] * B\n\t" + /* A[35] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[36] * B\n\t" + /* A[36] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[37] * B\n\t" + /* A[37] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[38] * B\n\t" + /* A[38] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[39] * B\n\t" + /* A[39] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[40] * B\n\t" + /* A[40] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[41] * B\n\t" + /* A[41] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[42] * B\n\t" + /* A[42] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[43] * B\n\t" + /* A[43] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[44] * B\n\t" + /* A[44] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[45] * B\n\t" + /* A[45] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[46] * B\n\t" + /* A[46] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[47] * B\n\t" + /* A[47] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adc r3, r3, r7\n\t" +#endif "str r5, [%[r]], #4\n\t" "str r3, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10" ); -#endif } +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV /* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. @@ -13913,57 +36175,184 @@ static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, */ static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) +{ + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_3072_word_48_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_3072_word_48_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* Compare a with b in constant time. * * a A single precision integer. @@ -13973,574 +36362,566 @@ static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, sp_digit div) */ static sp_int32 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) { - sp_digit r = -1; - sp_digit one = 1; - - + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #188\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "mov r4, #0xbc\n\t" + "\n" + "L_sp_3072_cmp_48_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_3072_cmp_48_words_%=\n\t" + "eor r2, r2, r3\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #188]\n\t" - "ldr r5, [%[b], #188]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "ldr r12, [%[a], #188]\n\t" + "ldr lr, [%[b], #188]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r5, [%[b], #184]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #184]\n\t" + "ldr lr, [%[b], #184]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #180]\n\t" - "ldr r5, [%[b], #180]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #180]\n\t" + "ldr lr, [%[b], #180]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r5, [%[b], #176]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #176]\n\t" + "ldr lr, [%[b], #176]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #172]\n\t" - "ldr r5, [%[b], #172]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #172]\n\t" + "ldr lr, [%[b], #172]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r5, [%[b], #168]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #168]\n\t" + "ldr lr, [%[b], #168]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #164]\n\t" - "ldr r5, [%[b], #164]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #164]\n\t" + "ldr lr, [%[b], #164]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r5, [%[b], #160]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #160]\n\t" + "ldr lr, [%[b], #160]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #156]\n\t" - "ldr r5, [%[b], #156]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #156]\n\t" + "ldr lr, [%[b], #156]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r5, [%[b], #152]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #152]\n\t" + "ldr lr, [%[b], #152]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #148]\n\t" - "ldr r5, [%[b], #148]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #148]\n\t" + "ldr lr, [%[b], #148]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r5, [%[b], #144]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #144]\n\t" + "ldr lr, [%[b], #144]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #140]\n\t" - "ldr r5, [%[b], #140]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #140]\n\t" + "ldr lr, [%[b], #140]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r5, [%[b], #136]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #136]\n\t" + "ldr lr, [%[b], #136]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #132]\n\t" - "ldr r5, [%[b], #132]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #132]\n\t" + "ldr lr, [%[b], #132]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r5, [%[b], #128]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #128]\n\t" + "ldr lr, [%[b], #128]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #124]\n\t" + "ldr lr, [%[b], #124]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #120]\n\t" + "ldr lr, [%[b], #120]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #116]\n\t" + "ldr lr, [%[b], #116]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #112]\n\t" + "ldr lr, [%[b], #112]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #108]\n\t" + "ldr lr, [%[b], #108]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #104]\n\t" + "ldr lr, [%[b], #104]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #100]\n\t" + "ldr lr, [%[b], #100]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #96]\n\t" + "ldr lr, [%[b], #96]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #92]\n\t" + "ldr lr, [%[b], #92]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #88]\n\t" + "ldr lr, [%[b], #88]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #84]\n\t" + "ldr lr, [%[b], #84]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #80]\n\t" + "ldr lr, [%[b], #80]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #76]\n\t" + "ldr lr, [%[b], #76]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #72]\n\t" + "ldr lr, [%[b], #72]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #68]\n\t" + "ldr lr, [%[b], #68]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #64]\n\t" + "ldr lr, [%[b], #64]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #60]\n\t" + "ldr lr, [%[b], #60]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #56]\n\t" + "ldr lr, [%[b], #56]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #52]\n\t" + "ldr lr, [%[b], #52]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #48]\n\t" + "ldr lr, [%[b], #48]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); -#endif - - return r; + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -14939,6 +37320,7 @@ static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m) } #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -14947,34 +37329,45 @@ static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r6, [%[b], r8]\n\t" - "and r6, r6, %[m]\n\t" - "sbcs r4, r4, r6\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #384\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_3072_cond_sub_96_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #0x180\n\t" + "blt L_sp_3072_cond_sub_96_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6" ); -#else - __asm__ __volatile__ ( + return (uint32_t)(size_t)r; +} - "mov r9, #0\n\t" +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +{ + __asm__ __volatile__ ( + "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" @@ -15311,912 +37704,3421 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_dig "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" "stm %[r]!, {r4, r5}\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7" ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#endif /* WOLFSSL_SP_SMALL */ /* Reduce the number back to 3072 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit mp) { - sp_digit ca = 0; - __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_3072_mont_reduce_96_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, r8, r11\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r10, #0\n\t" +#endif + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r10, #0\n\t" +#endif + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" +#endif "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" - "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" "adc r4, r4, #0\n\t" - "# a[i+12] += m[12] * mu\n\t" - "ldr r7, [%[m], #48]\n\t" - "ldr r9, [%[a], #48]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #48]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+13] += m[13] * mu\n\t" - "ldr r7, [%[m], #52]\n\t" - "ldr r9, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #52]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" "adc r4, r4, #0\n\t" - "# a[i+14] += m[14] * mu\n\t" - "ldr r7, [%[m], #56]\n\t" - "ldr r9, [%[a], #56]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #56]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+15] += m[15] * mu\n\t" - "ldr r7, [%[m], #60]\n\t" - "ldr r9, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #60]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" "adc r4, r4, #0\n\t" - "# a[i+16] += m[16] * mu\n\t" - "ldr r7, [%[m], #64]\n\t" - "ldr r9, [%[a], #64]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #64]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+17] += m[17] * mu\n\t" - "ldr r7, [%[m], #68]\n\t" - "ldr r9, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #68]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" "adc r4, r4, #0\n\t" - "# a[i+18] += m[18] * mu\n\t" - "ldr r7, [%[m], #72]\n\t" - "ldr r9, [%[a], #72]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #72]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+19] += m[19] * mu\n\t" - "ldr r7, [%[m], #76]\n\t" - "ldr r9, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #76]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" "adc r4, r4, #0\n\t" - "# a[i+20] += m[20] * mu\n\t" - "ldr r7, [%[m], #80]\n\t" - "ldr r9, [%[a], #80]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #80]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+21] += m[21] * mu\n\t" - "ldr r7, [%[m], #84]\n\t" - "ldr r9, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #84]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" "adc r4, r4, #0\n\t" - "# a[i+22] += m[22] * mu\n\t" - "ldr r7, [%[m], #88]\n\t" - "ldr r9, [%[a], #88]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #88]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+23] += m[23] * mu\n\t" - "ldr r7, [%[m], #92]\n\t" - "ldr r9, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #92]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" "adc r4, r4, #0\n\t" - "# a[i+24] += m[24] * mu\n\t" - "ldr r7, [%[m], #96]\n\t" - "ldr r9, [%[a], #96]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #96]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+25] += m[25] * mu\n\t" - "ldr r7, [%[m], #100]\n\t" - "ldr r9, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #100]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" "adc r4, r4, #0\n\t" - "# a[i+26] += m[26] * mu\n\t" - "ldr r7, [%[m], #104]\n\t" - "ldr r9, [%[a], #104]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #104]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+27] += m[27] * mu\n\t" - "ldr r7, [%[m], #108]\n\t" - "ldr r9, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r7, [%[m], #124]\n\t" + "ldr r10, [%[a], #124]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #108]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" "adc r4, r4, #0\n\t" - "# a[i+28] += m[28] * mu\n\t" - "ldr r7, [%[m], #112]\n\t" - "ldr r9, [%[a], #112]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #112]\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r7, [%[m], #128]\n\t" + "ldr r10, [%[a], #128]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+29] += m[29] * mu\n\t" - "ldr r7, [%[m], #116]\n\t" - "ldr r9, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r7, [%[m], #132]\n\t" + "ldr r10, [%[a], #132]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #116]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #132]\n\t" "adc r4, r4, #0\n\t" - "# a[i+30] += m[30] * mu\n\t" - "ldr r7, [%[m], #120]\n\t" - "ldr r9, [%[a], #120]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #120]\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r7, [%[m], #136]\n\t" + "ldr r10, [%[a], #136]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+31] += m[31] * mu\n\t" - "ldr r7, [%[m], #124]\n\t" - "ldr r9, [%[a], #124]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r7, [%[m], #140]\n\t" + "ldr r10, [%[a], #140]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #124]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #140]\n\t" "adc r4, r4, #0\n\t" - "# a[i+32] += m[32] * mu\n\t" - "ldr r7, [%[m], #128]\n\t" - "ldr r9, [%[a], #128]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #128]\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r7, [%[m], #144]\n\t" + "ldr r10, [%[a], #144]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+33] += m[33] * mu\n\t" - "ldr r7, [%[m], #132]\n\t" - "ldr r9, [%[a], #132]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r7, [%[m], #148]\n\t" + "ldr r10, [%[a], #148]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #132]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #148]\n\t" "adc r4, r4, #0\n\t" - "# a[i+34] += m[34] * mu\n\t" - "ldr r7, [%[m], #136]\n\t" - "ldr r9, [%[a], #136]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #136]\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r7, [%[m], #152]\n\t" + "ldr r10, [%[a], #152]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+35] += m[35] * mu\n\t" - "ldr r7, [%[m], #140]\n\t" - "ldr r9, [%[a], #140]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r7, [%[m], #156]\n\t" + "ldr r10, [%[a], #156]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #140]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #156]\n\t" "adc r4, r4, #0\n\t" - "# a[i+36] += m[36] * mu\n\t" - "ldr r7, [%[m], #144]\n\t" - "ldr r9, [%[a], #144]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #144]\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r7, [%[m], #160]\n\t" + "ldr r10, [%[a], #160]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+37] += m[37] * mu\n\t" - "ldr r7, [%[m], #148]\n\t" - "ldr r9, [%[a], #148]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r7, [%[m], #164]\n\t" + "ldr r10, [%[a], #164]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #148]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #164]\n\t" "adc r4, r4, #0\n\t" - "# a[i+38] += m[38] * mu\n\t" - "ldr r7, [%[m], #152]\n\t" - "ldr r9, [%[a], #152]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #152]\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r7, [%[m], #168]\n\t" + "ldr r10, [%[a], #168]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+39] += m[39] * mu\n\t" - "ldr r7, [%[m], #156]\n\t" - "ldr r9, [%[a], #156]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r7, [%[m], #172]\n\t" + "ldr r10, [%[a], #172]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #156]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #172]\n\t" "adc r4, r4, #0\n\t" - "# a[i+40] += m[40] * mu\n\t" - "ldr r7, [%[m], #160]\n\t" - "ldr r9, [%[a], #160]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #160]\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r7, [%[m], #176]\n\t" + "ldr r10, [%[a], #176]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+41] += m[41] * mu\n\t" - "ldr r7, [%[m], #164]\n\t" - "ldr r9, [%[a], #164]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r7, [%[m], #180]\n\t" + "ldr r10, [%[a], #180]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #164]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #180]\n\t" "adc r4, r4, #0\n\t" - "# a[i+42] += m[42] * mu\n\t" - "ldr r7, [%[m], #168]\n\t" - "ldr r9, [%[a], #168]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #168]\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r7, [%[m], #184]\n\t" + "ldr r10, [%[a], #184]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+43] += m[43] * mu\n\t" - "ldr r7, [%[m], #172]\n\t" - "ldr r9, [%[a], #172]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+47] += m[47] * mu */ + "ldr r7, [%[m], #188]\n\t" + "ldr r10, [%[a], #188]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #172]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #188]\n\t" "adc r4, r4, #0\n\t" - "# a[i+44] += m[44] * mu\n\t" - "ldr r7, [%[m], #176]\n\t" - "ldr r9, [%[a], #176]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #176]\n\t" + /* a[i+48] += m[48] * mu */ + "ldr r7, [%[m], #192]\n\t" + "ldr r10, [%[a], #192]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+45] += m[45] * mu\n\t" - "ldr r7, [%[m], #180]\n\t" - "ldr r9, [%[a], #180]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #192]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+49] += m[49] * mu */ + "ldr r7, [%[m], #196]\n\t" + "ldr r10, [%[a], #196]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #180]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #196]\n\t" "adc r4, r4, #0\n\t" - "# a[i+46] += m[46] * mu\n\t" - "ldr r7, [%[m], #184]\n\t" - "ldr r9, [%[a], #184]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #184]\n\t" + /* a[i+50] += m[50] * mu */ + "ldr r7, [%[m], #200]\n\t" + "ldr r10, [%[a], #200]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+47] += m[47] * mu\n\t" - "ldr r7, [%[m], #188]\n\t" - "ldr r9, [%[a], #188]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #200]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+51] += m[51] * mu */ + "ldr r7, [%[m], #204]\n\t" + "ldr r10, [%[a], #204]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #188]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #204]\n\t" "adc r4, r4, #0\n\t" - "# a[i+48] += m[48] * mu\n\t" - "ldr r7, [%[m], #192]\n\t" - "ldr r9, [%[a], #192]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #192]\n\t" + /* a[i+52] += m[52] * mu */ + "ldr r7, [%[m], #208]\n\t" + "ldr r10, [%[a], #208]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+49] += m[49] * mu\n\t" - "ldr r7, [%[m], #196]\n\t" - "ldr r9, [%[a], #196]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #208]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+53] += m[53] * mu */ + "ldr r7, [%[m], #212]\n\t" + "ldr r10, [%[a], #212]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #196]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #212]\n\t" "adc r4, r4, #0\n\t" - "# a[i+50] += m[50] * mu\n\t" - "ldr r7, [%[m], #200]\n\t" - "ldr r9, [%[a], #200]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #200]\n\t" + /* a[i+54] += m[54] * mu */ + "ldr r7, [%[m], #216]\n\t" + "ldr r10, [%[a], #216]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+51] += m[51] * mu\n\t" - "ldr r7, [%[m], #204]\n\t" - "ldr r9, [%[a], #204]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #216]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+55] += m[55] * mu */ + "ldr r7, [%[m], #220]\n\t" + "ldr r10, [%[a], #220]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #204]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #220]\n\t" "adc r4, r4, #0\n\t" - "# a[i+52] += m[52] * mu\n\t" - "ldr r7, [%[m], #208]\n\t" - "ldr r9, [%[a], #208]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #208]\n\t" + /* a[i+56] += m[56] * mu */ + "ldr r7, [%[m], #224]\n\t" + "ldr r10, [%[a], #224]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+53] += m[53] * mu\n\t" - "ldr r7, [%[m], #212]\n\t" - "ldr r9, [%[a], #212]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #224]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+57] += m[57] * mu */ + "ldr r7, [%[m], #228]\n\t" + "ldr r10, [%[a], #228]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #212]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #228]\n\t" "adc r4, r4, #0\n\t" - "# a[i+54] += m[54] * mu\n\t" - "ldr r7, [%[m], #216]\n\t" - "ldr r9, [%[a], #216]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #216]\n\t" + /* a[i+58] += m[58] * mu */ + "ldr r7, [%[m], #232]\n\t" + "ldr r10, [%[a], #232]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+55] += m[55] * mu\n\t" - "ldr r7, [%[m], #220]\n\t" - "ldr r9, [%[a], #220]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #232]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+59] += m[59] * mu */ + "ldr r7, [%[m], #236]\n\t" + "ldr r10, [%[a], #236]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #220]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #236]\n\t" "adc r4, r4, #0\n\t" - "# a[i+56] += m[56] * mu\n\t" - "ldr r7, [%[m], #224]\n\t" - "ldr r9, [%[a], #224]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #224]\n\t" + /* a[i+60] += m[60] * mu */ + "ldr r7, [%[m], #240]\n\t" + "ldr r10, [%[a], #240]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+57] += m[57] * mu\n\t" - "ldr r7, [%[m], #228]\n\t" - "ldr r9, [%[a], #228]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #240]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+61] += m[61] * mu */ + "ldr r7, [%[m], #244]\n\t" + "ldr r10, [%[a], #244]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #228]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #244]\n\t" "adc r4, r4, #0\n\t" - "# a[i+58] += m[58] * mu\n\t" - "ldr r7, [%[m], #232]\n\t" - "ldr r9, [%[a], #232]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #232]\n\t" + /* a[i+62] += m[62] * mu */ + "ldr r7, [%[m], #248]\n\t" + "ldr r10, [%[a], #248]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+59] += m[59] * mu\n\t" - "ldr r7, [%[m], #236]\n\t" - "ldr r9, [%[a], #236]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #248]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+63] += m[63] * mu */ + "ldr r7, [%[m], #252]\n\t" + "ldr r10, [%[a], #252]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #236]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #252]\n\t" "adc r4, r4, #0\n\t" - "# a[i+60] += m[60] * mu\n\t" - "ldr r7, [%[m], #240]\n\t" - "ldr r9, [%[a], #240]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #240]\n\t" + /* a[i+64] += m[64] * mu */ + "ldr r7, [%[m], #256]\n\t" + "ldr r10, [%[a], #256]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+61] += m[61] * mu\n\t" - "ldr r7, [%[m], #244]\n\t" - "ldr r9, [%[a], #244]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #256]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+65] += m[65] * mu */ + "ldr r7, [%[m], #260]\n\t" + "ldr r10, [%[a], #260]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #244]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #260]\n\t" "adc r4, r4, #0\n\t" - "# a[i+62] += m[62] * mu\n\t" - "ldr r7, [%[m], #248]\n\t" - "ldr r9, [%[a], #248]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #248]\n\t" + /* a[i+66] += m[66] * mu */ + "ldr r7, [%[m], #264]\n\t" + "ldr r10, [%[a], #264]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+63] += m[63] * mu\n\t" - "ldr r7, [%[m], #252]\n\t" - "ldr r9, [%[a], #252]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #264]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+67] += m[67] * mu */ + "ldr r7, [%[m], #268]\n\t" + "ldr r10, [%[a], #268]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #252]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #268]\n\t" "adc r4, r4, #0\n\t" - "# a[i+64] += m[64] * mu\n\t" - "ldr r7, [%[m], #256]\n\t" - "ldr r9, [%[a], #256]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #256]\n\t" + /* a[i+68] += m[68] * mu */ + "ldr r7, [%[m], #272]\n\t" + "ldr r10, [%[a], #272]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+65] += m[65] * mu\n\t" - "ldr r7, [%[m], #260]\n\t" - "ldr r9, [%[a], #260]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #272]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+69] += m[69] * mu */ + "ldr r7, [%[m], #276]\n\t" + "ldr r10, [%[a], #276]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #260]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #276]\n\t" "adc r4, r4, #0\n\t" - "# a[i+66] += m[66] * mu\n\t" - "ldr r7, [%[m], #264]\n\t" - "ldr r9, [%[a], #264]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #264]\n\t" + /* a[i+70] += m[70] * mu */ + "ldr r7, [%[m], #280]\n\t" + "ldr r10, [%[a], #280]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+67] += m[67] * mu\n\t" - "ldr r7, [%[m], #268]\n\t" - "ldr r9, [%[a], #268]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #280]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+71] += m[71] * mu */ + "ldr r7, [%[m], #284]\n\t" + "ldr r10, [%[a], #284]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #268]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #284]\n\t" "adc r4, r4, #0\n\t" - "# a[i+68] += m[68] * mu\n\t" - "ldr r7, [%[m], #272]\n\t" - "ldr r9, [%[a], #272]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #272]\n\t" + /* a[i+72] += m[72] * mu */ + "ldr r7, [%[m], #288]\n\t" + "ldr r10, [%[a], #288]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+69] += m[69] * mu\n\t" - "ldr r7, [%[m], #276]\n\t" - "ldr r9, [%[a], #276]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #288]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+73] += m[73] * mu */ + "ldr r7, [%[m], #292]\n\t" + "ldr r10, [%[a], #292]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #276]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #292]\n\t" "adc r4, r4, #0\n\t" - "# a[i+70] += m[70] * mu\n\t" - "ldr r7, [%[m], #280]\n\t" - "ldr r9, [%[a], #280]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #280]\n\t" + /* a[i+74] += m[74] * mu */ + "ldr r7, [%[m], #296]\n\t" + "ldr r10, [%[a], #296]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+71] += m[71] * mu\n\t" - "ldr r7, [%[m], #284]\n\t" - "ldr r9, [%[a], #284]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #296]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+75] += m[75] * mu */ + "ldr r7, [%[m], #300]\n\t" + "ldr r10, [%[a], #300]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #284]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #300]\n\t" "adc r4, r4, #0\n\t" - "# a[i+72] += m[72] * mu\n\t" - "ldr r7, [%[m], #288]\n\t" - "ldr r9, [%[a], #288]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #288]\n\t" + /* a[i+76] += m[76] * mu */ + "ldr r7, [%[m], #304]\n\t" + "ldr r10, [%[a], #304]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+73] += m[73] * mu\n\t" - "ldr r7, [%[m], #292]\n\t" - "ldr r9, [%[a], #292]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #304]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+77] += m[77] * mu */ + "ldr r7, [%[m], #308]\n\t" + "ldr r10, [%[a], #308]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #292]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #308]\n\t" "adc r4, r4, #0\n\t" - "# a[i+74] += m[74] * mu\n\t" - "ldr r7, [%[m], #296]\n\t" - "ldr r9, [%[a], #296]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #296]\n\t" + /* a[i+78] += m[78] * mu */ + "ldr r7, [%[m], #312]\n\t" + "ldr r10, [%[a], #312]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+75] += m[75] * mu\n\t" - "ldr r7, [%[m], #300]\n\t" - "ldr r9, [%[a], #300]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #312]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+79] += m[79] * mu */ + "ldr r7, [%[m], #316]\n\t" + "ldr r10, [%[a], #316]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #300]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #316]\n\t" "adc r4, r4, #0\n\t" - "# a[i+76] += m[76] * mu\n\t" - "ldr r7, [%[m], #304]\n\t" - "ldr r9, [%[a], #304]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #304]\n\t" + /* a[i+80] += m[80] * mu */ + "ldr r7, [%[m], #320]\n\t" + "ldr r10, [%[a], #320]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+77] += m[77] * mu\n\t" - "ldr r7, [%[m], #308]\n\t" - "ldr r9, [%[a], #308]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #320]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+81] += m[81] * mu */ + "ldr r7, [%[m], #324]\n\t" + "ldr r10, [%[a], #324]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #308]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #324]\n\t" "adc r4, r4, #0\n\t" - "# a[i+78] += m[78] * mu\n\t" - "ldr r7, [%[m], #312]\n\t" - "ldr r9, [%[a], #312]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #312]\n\t" + /* a[i+82] += m[82] * mu */ + "ldr r7, [%[m], #328]\n\t" + "ldr r10, [%[a], #328]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+79] += m[79] * mu\n\t" - "ldr r7, [%[m], #316]\n\t" - "ldr r9, [%[a], #316]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #328]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+83] += m[83] * mu */ + "ldr r7, [%[m], #332]\n\t" + "ldr r10, [%[a], #332]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #316]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #332]\n\t" "adc r4, r4, #0\n\t" - "# a[i+80] += m[80] * mu\n\t" - "ldr r7, [%[m], #320]\n\t" - "ldr r9, [%[a], #320]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #320]\n\t" + /* a[i+84] += m[84] * mu */ + "ldr r7, [%[m], #336]\n\t" + "ldr r10, [%[a], #336]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+81] += m[81] * mu\n\t" - "ldr r7, [%[m], #324]\n\t" - "ldr r9, [%[a], #324]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #336]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+85] += m[85] * mu */ + "ldr r7, [%[m], #340]\n\t" + "ldr r10, [%[a], #340]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #324]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #340]\n\t" "adc r4, r4, #0\n\t" - "# a[i+82] += m[82] * mu\n\t" - "ldr r7, [%[m], #328]\n\t" - "ldr r9, [%[a], #328]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #328]\n\t" + /* a[i+86] += m[86] * mu */ + "ldr r7, [%[m], #344]\n\t" + "ldr r10, [%[a], #344]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+83] += m[83] * mu\n\t" - "ldr r7, [%[m], #332]\n\t" - "ldr r9, [%[a], #332]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #344]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+87] += m[87] * mu */ + "ldr r7, [%[m], #348]\n\t" + "ldr r10, [%[a], #348]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #332]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #348]\n\t" "adc r4, r4, #0\n\t" - "# a[i+84] += m[84] * mu\n\t" - "ldr r7, [%[m], #336]\n\t" - "ldr r9, [%[a], #336]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #336]\n\t" + /* a[i+88] += m[88] * mu */ + "ldr r7, [%[m], #352]\n\t" + "ldr r10, [%[a], #352]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+85] += m[85] * mu\n\t" - "ldr r7, [%[m], #340]\n\t" - "ldr r9, [%[a], #340]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #352]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+89] += m[89] * mu */ + "ldr r7, [%[m], #356]\n\t" + "ldr r10, [%[a], #356]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #340]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #356]\n\t" "adc r4, r4, #0\n\t" - "# a[i+86] += m[86] * mu\n\t" - "ldr r7, [%[m], #344]\n\t" - "ldr r9, [%[a], #344]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #344]\n\t" + /* a[i+90] += m[90] * mu */ + "ldr r7, [%[m], #360]\n\t" + "ldr r10, [%[a], #360]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+87] += m[87] * mu\n\t" - "ldr r7, [%[m], #348]\n\t" - "ldr r9, [%[a], #348]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #360]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+91] += m[91] * mu */ + "ldr r7, [%[m], #364]\n\t" + "ldr r10, [%[a], #364]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #348]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #364]\n\t" "adc r4, r4, #0\n\t" - "# a[i+88] += m[88] * mu\n\t" - "ldr r7, [%[m], #352]\n\t" - "ldr r9, [%[a], #352]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #352]\n\t" + /* a[i+92] += m[92] * mu */ + "ldr r7, [%[m], #368]\n\t" + "ldr r10, [%[a], #368]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+89] += m[89] * mu\n\t" - "ldr r7, [%[m], #356]\n\t" - "ldr r9, [%[a], #356]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #368]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+93] += m[93] * mu */ + "ldr r7, [%[m], #372]\n\t" + "ldr r10, [%[a], #372]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #356]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #372]\n\t" "adc r4, r4, #0\n\t" - "# a[i+90] += m[90] * mu\n\t" - "ldr r7, [%[m], #360]\n\t" - "ldr r9, [%[a], #360]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #360]\n\t" + /* a[i+94] += m[94] * mu */ + "ldr r7, [%[m], #376]\n\t" + "ldr r10, [%[a], #376]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+91] += m[91] * mu\n\t" - "ldr r7, [%[m], #364]\n\t" - "ldr r9, [%[a], #364]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #364]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+92] += m[92] * mu\n\t" - "ldr r7, [%[m], #368]\n\t" - "ldr r9, [%[a], #368]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #368]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #376]\n\t" "adc r5, r5, #0\n\t" - "# a[i+93] += m[93] * mu\n\t" - "ldr r7, [%[m], #372]\n\t" - "ldr r9, [%[a], #372]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #372]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+94] += m[94] * mu\n\t" - "ldr r7, [%[m], #376]\n\t" - "ldr r9, [%[a], #376]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #376]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+95] += m[95] * mu\n\t" + /* a[i+95] += m[95] * mu */ +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r7, [%[m], #380]\n\t" - "ldr r9, [%[a], #380]\n\t" +#else + "ldr r11, [%[m], #380]\n\t" +#endif + "ldr r10, [%[a], #380]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #380]\n\t" - "ldr r9, [%[a], #384]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #384]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #380]\n\t" + "ldr r10, [%[a], #384]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #384]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #384\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #0x180\n\t" + "blt L_sp_3072_mont_reduce_96_word_%=\n\t" + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); - - sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca); + sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - mp); } /* Multiply two Montgomery form numbers mod the modulus (prime). @@ -16256,31 +41158,30 @@ SP_NOINLINE static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #384\n\t" - "\n1:\n\t" - "rsbs %[c], %[c], #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x180\n\t" + "\n" + "L_sp_3072_sub_96_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r4, r4\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_3072_sub_96_word_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -16290,190 +41191,187 @@ static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV /* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. @@ -16485,57 +41383,184 @@ static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, */ static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, sp_digit div) +{ + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_3072_word_96_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_3072_word_96_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -16648,1102 +41673,1100 @@ static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m) */ static sp_int32 sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) { - sp_digit r = -1; - sp_digit one = 1; - - + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #380\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r4, #0x1\n\t" + "lsl r4, r4, #8\n\t" + "add r4, r4, #0x7c\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #380]\n\t" - "ldr r5, [%[b], #380]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #376]\n\t" - "ldr r5, [%[b], #376]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #372]\n\t" - "ldr r5, [%[b], #372]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #368]\n\t" - "ldr r5, [%[b], #368]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #364]\n\t" - "ldr r5, [%[b], #364]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #360]\n\t" - "ldr r5, [%[b], #360]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #356]\n\t" - "ldr r5, [%[b], #356]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #352]\n\t" - "ldr r5, [%[b], #352]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #348]\n\t" - "ldr r5, [%[b], #348]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #344]\n\t" - "ldr r5, [%[b], #344]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #340]\n\t" - "ldr r5, [%[b], #340]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #336]\n\t" - "ldr r5, [%[b], #336]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #332]\n\t" - "ldr r5, [%[b], #332]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #328]\n\t" - "ldr r5, [%[b], #328]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #324]\n\t" - "ldr r5, [%[b], #324]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #320]\n\t" - "ldr r5, [%[b], #320]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #316]\n\t" - "ldr r5, [%[b], #316]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #312]\n\t" - "ldr r5, [%[b], #312]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #308]\n\t" - "ldr r5, [%[b], #308]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #304]\n\t" - "ldr r5, [%[b], #304]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #300]\n\t" - "ldr r5, [%[b], #300]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #296]\n\t" - "ldr r5, [%[b], #296]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #292]\n\t" - "ldr r5, [%[b], #292]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #288]\n\t" - "ldr r5, [%[b], #288]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #284]\n\t" - "ldr r5, [%[b], #284]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #280]\n\t" - "ldr r5, [%[b], #280]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #276]\n\t" - "ldr r5, [%[b], #276]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #272]\n\t" - "ldr r5, [%[b], #272]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #268]\n\t" - "ldr r5, [%[b], #268]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #264]\n\t" - "ldr r5, [%[b], #264]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #260]\n\t" - "ldr r5, [%[b], #260]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #256]\n\t" - "ldr r5, [%[b], #256]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #252]\n\t" - "ldr r5, [%[b], #252]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #248]\n\t" - "ldr r5, [%[b], #248]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #244]\n\t" - "ldr r5, [%[b], #244]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #240]\n\t" - "ldr r5, [%[b], #240]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #236]\n\t" - "ldr r5, [%[b], #236]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #232]\n\t" - "ldr r5, [%[b], #232]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #228]\n\t" - "ldr r5, [%[b], #228]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #224]\n\t" - "ldr r5, [%[b], #224]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #220]\n\t" - "ldr r5, [%[b], #220]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #216]\n\t" - "ldr r5, [%[b], #216]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #212]\n\t" - "ldr r5, [%[b], #212]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #208]\n\t" - "ldr r5, [%[b], #208]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #204]\n\t" - "ldr r5, [%[b], #204]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #200]\n\t" - "ldr r5, [%[b], #200]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #196]\n\t" - "ldr r5, [%[b], #196]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #192]\n\t" - "ldr r5, [%[b], #192]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #188]\n\t" - "ldr r5, [%[b], #188]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r5, [%[b], #184]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #180]\n\t" - "ldr r5, [%[b], #180]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r5, [%[b], #176]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #172]\n\t" - "ldr r5, [%[b], #172]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r5, [%[b], #168]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #164]\n\t" - "ldr r5, [%[b], #164]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r5, [%[b], #160]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #156]\n\t" - "ldr r5, [%[b], #156]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r5, [%[b], #152]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #148]\n\t" - "ldr r5, [%[b], #148]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r5, [%[b], #144]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #140]\n\t" - "ldr r5, [%[b], #140]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r5, [%[b], #136]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #132]\n\t" - "ldr r5, [%[b], #132]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r5, [%[b], #128]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "mov r4, #0x17c\n\t" #endif - - return r; + "\n" + "L_sp_3072_cmp_96_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_3072_cmp_96_words_%=\n\t" + "eor r2, r2, r3\n\t" +#else + "ldr r12, [%[a], #380]\n\t" + "ldr lr, [%[b], #380]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #376]\n\t" + "ldr lr, [%[b], #376]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #372]\n\t" + "ldr lr, [%[b], #372]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #368]\n\t" + "ldr lr, [%[b], #368]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #364]\n\t" + "ldr lr, [%[b], #364]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #360]\n\t" + "ldr lr, [%[b], #360]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #356]\n\t" + "ldr lr, [%[b], #356]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #352]\n\t" + "ldr lr, [%[b], #352]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #348]\n\t" + "ldr lr, [%[b], #348]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #344]\n\t" + "ldr lr, [%[b], #344]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #340]\n\t" + "ldr lr, [%[b], #340]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #336]\n\t" + "ldr lr, [%[b], #336]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #332]\n\t" + "ldr lr, [%[b], #332]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #328]\n\t" + "ldr lr, [%[b], #328]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #324]\n\t" + "ldr lr, [%[b], #324]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #320]\n\t" + "ldr lr, [%[b], #320]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #316]\n\t" + "ldr lr, [%[b], #316]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #312]\n\t" + "ldr lr, [%[b], #312]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #308]\n\t" + "ldr lr, [%[b], #308]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #304]\n\t" + "ldr lr, [%[b], #304]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #300]\n\t" + "ldr lr, [%[b], #300]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #296]\n\t" + "ldr lr, [%[b], #296]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #292]\n\t" + "ldr lr, [%[b], #292]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #288]\n\t" + "ldr lr, [%[b], #288]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #284]\n\t" + "ldr lr, [%[b], #284]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #280]\n\t" + "ldr lr, [%[b], #280]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #276]\n\t" + "ldr lr, [%[b], #276]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #272]\n\t" + "ldr lr, [%[b], #272]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #268]\n\t" + "ldr lr, [%[b], #268]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #264]\n\t" + "ldr lr, [%[b], #264]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #260]\n\t" + "ldr lr, [%[b], #260]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #256]\n\t" + "ldr lr, [%[b], #256]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #252]\n\t" + "ldr lr, [%[b], #252]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #248]\n\t" + "ldr lr, [%[b], #248]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #244]\n\t" + "ldr lr, [%[b], #244]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #240]\n\t" + "ldr lr, [%[b], #240]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #236]\n\t" + "ldr lr, [%[b], #236]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #232]\n\t" + "ldr lr, [%[b], #232]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #228]\n\t" + "ldr lr, [%[b], #228]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #224]\n\t" + "ldr lr, [%[b], #224]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #220]\n\t" + "ldr lr, [%[b], #220]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #216]\n\t" + "ldr lr, [%[b], #216]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #212]\n\t" + "ldr lr, [%[b], #212]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #208]\n\t" + "ldr lr, [%[b], #208]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #204]\n\t" + "ldr lr, [%[b], #204]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #200]\n\t" + "ldr lr, [%[b], #200]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #196]\n\t" + "ldr lr, [%[b], #196]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #192]\n\t" + "ldr lr, [%[b], #192]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #188]\n\t" + "ldr lr, [%[b], #188]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #184]\n\t" + "ldr lr, [%[b], #184]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #180]\n\t" + "ldr lr, [%[b], #180]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #176]\n\t" + "ldr lr, [%[b], #176]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #172]\n\t" + "ldr lr, [%[b], #172]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #168]\n\t" + "ldr lr, [%[b], #168]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #164]\n\t" + "ldr lr, [%[b], #164]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #160]\n\t" + "ldr lr, [%[b], #160]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #156]\n\t" + "ldr lr, [%[b], #156]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #152]\n\t" + "ldr lr, [%[b], #152]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #148]\n\t" + "ldr lr, [%[b], #148]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #144]\n\t" + "ldr lr, [%[b], #144]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #140]\n\t" + "ldr lr, [%[b], #140]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #136]\n\t" + "ldr lr, [%[b], #136]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #132]\n\t" + "ldr lr, [%[b], #132]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #128]\n\t" + "ldr lr, [%[b], #128]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #124]\n\t" + "ldr lr, [%[b], #124]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #120]\n\t" + "ldr lr, [%[b], #120]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #116]\n\t" + "ldr lr, [%[b], #116]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #112]\n\t" + "ldr lr, [%[b], #112]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #108]\n\t" + "ldr lr, [%[b], #108]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #104]\n\t" + "ldr lr, [%[b], #104]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #100]\n\t" + "ldr lr, [%[b], #100]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #96]\n\t" + "ldr lr, [%[b], #96]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #92]\n\t" + "ldr lr, [%[b], #92]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #88]\n\t" + "ldr lr, [%[b], #88]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #84]\n\t" + "ldr lr, [%[b], #84]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #80]\n\t" + "ldr lr, [%[b], #80]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #76]\n\t" + "ldr lr, [%[b], #76]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #72]\n\t" + "ldr lr, [%[b], #72]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #68]\n\t" + "ldr lr, [%[b], #68]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #64]\n\t" + "ldr lr, [%[b], #64]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #60]\n\t" + "ldr lr, [%[b], #60]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #56]\n\t" + "ldr lr, [%[b], #56]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #52]\n\t" + "ldr lr, [%[b], #52]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #48]\n\t" + "ldr lr, [%[b], #48]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6" + ); + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -18274,35 +43297,33 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r7, #0\n\t" + "mov lr, #0\n\t" "mov r6, #0\n\t" - "1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" + "mov r12, #0\n\t" + "\n" + "L_sp_3072_cond_add_48_words_%=: \n\t" + "adds lr, lr, #-1\n\t" + "ldr r4, [%[a], r12]\n\t" + "ldr r5, [%[b], r12]\n\t" "and r5, r5, %[m]\n\t" "adcs r4, r4, r5\n\t" - "adc %[c], r7, r7\n\t" - "str r4, [%[r], r6]\n\t" - "add r6, r6, #4\n\t" - "cmp r6, #192\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7" + "adc lr, r6, r6\n\t" + "str r4, [%[r], r12]\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #0xc0\n\t" + "blt L_sp_3072_cond_add_48_words_%=\n\t" + "mov %[r], lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)r; } -#endif /* WOLFSSL_SP_SMALL */ -#ifndef WOLFSSL_SP_SMALL +#else /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -18311,480 +43332,187 @@ static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_dig * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r8, #0\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" -#else - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" -#endif + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" -#else - "strd r4, r5, [%[r], #0]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" -#else - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #8]\n\t" - "str r5, [%[r], #12]\n\t" -#else - "strd r4, r5, [%[r], #8]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" -#else - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" -#else - "strd r4, r5, [%[r], #16]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" -#else - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #24]\n\t" - "str r5, [%[r], #28]\n\t" -#else - "strd r4, r5, [%[r], #24]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" -#else - "ldrd r4, r5, [%[a], #32]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" -#else - "strd r4, r5, [%[r], #32]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" -#else - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #40]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #40]\n\t" - "str r5, [%[r], #44]\n\t" -#else - "strd r4, r5, [%[r], #40]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[a], #52]\n\t" - "ldr r6, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" -#else - "ldrd r4, r5, [%[a], #48]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #48]\n\t" - "str r5, [%[r], #52]\n\t" -#else - "strd r4, r5, [%[r], #48]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[a], #60]\n\t" - "ldr r6, [%[b], #56]\n\t" - "ldr r7, [%[b], #60]\n\t" -#else - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #56]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #56]\n\t" - "str r5, [%[r], #60]\n\t" -#else - "strd r4, r5, [%[r], #56]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[a], #68]\n\t" - "ldr r6, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" -#else - "ldrd r4, r5, [%[a], #64]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #64]\n\t" - "str r5, [%[r], #68]\n\t" -#else - "strd r4, r5, [%[r], #64]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[a], #76]\n\t" - "ldr r6, [%[b], #72]\n\t" - "ldr r7, [%[b], #76]\n\t" -#else - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #72]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #72]\n\t" - "str r5, [%[r], #76]\n\t" -#else - "strd r4, r5, [%[r], #72]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[a], #84]\n\t" - "ldr r6, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" -#else - "ldrd r4, r5, [%[a], #80]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #80]\n\t" - "str r5, [%[r], #84]\n\t" -#else - "strd r4, r5, [%[r], #80]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[a], #92]\n\t" - "ldr r6, [%[b], #88]\n\t" - "ldr r7, [%[b], #92]\n\t" -#else - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #88]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #88]\n\t" - "str r5, [%[r], #92]\n\t" -#else - "strd r4, r5, [%[r], #88]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[a], #100]\n\t" - "ldr r6, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" -#else - "ldrd r4, r5, [%[a], #96]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #96]\n\t" - "str r5, [%[r], #100]\n\t" -#else - "strd r4, r5, [%[r], #96]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[a], #108]\n\t" - "ldr r6, [%[b], #104]\n\t" - "ldr r7, [%[b], #108]\n\t" -#else - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #104]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #104]\n\t" - "str r5, [%[r], #108]\n\t" -#else - "strd r4, r5, [%[r], #104]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[a], #116]\n\t" - "ldr r6, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" -#else - "ldrd r4, r5, [%[a], #112]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #112]\n\t" - "str r5, [%[r], #116]\n\t" -#else - "strd r4, r5, [%[r], #112]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[a], #124]\n\t" - "ldr r6, [%[b], #120]\n\t" - "ldr r7, [%[b], #124]\n\t" -#else - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #120]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #120]\n\t" - "str r5, [%[r], #124]\n\t" -#else - "strd r4, r5, [%[r], #120]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #128]\n\t" - "ldr r5, [%[a], #132]\n\t" - "ldr r6, [%[b], #128]\n\t" - "ldr r7, [%[b], #132]\n\t" -#else - "ldrd r4, r5, [%[a], #128]\n\t" - "ldrd r6, r7, [%[b], #128]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #128]\n\t" - "str r5, [%[r], #132]\n\t" -#else - "strd r4, r5, [%[r], #128]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #136]\n\t" - "ldr r5, [%[a], #140]\n\t" - "ldr r6, [%[b], #136]\n\t" - "ldr r7, [%[b], #140]\n\t" -#else - "ldrd r4, r5, [%[a], #136]\n\t" - "ldrd r6, r7, [%[b], #136]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #136]\n\t" - "str r5, [%[r], #140]\n\t" -#else - "strd r4, r5, [%[r], #136]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #144]\n\t" - "ldr r5, [%[a], #148]\n\t" - "ldr r6, [%[b], #144]\n\t" - "ldr r7, [%[b], #148]\n\t" -#else - "ldrd r4, r5, [%[a], #144]\n\t" - "ldrd r6, r7, [%[b], #144]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #144]\n\t" - "str r5, [%[r], #148]\n\t" -#else - "strd r4, r5, [%[r], #144]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #152]\n\t" - "ldr r5, [%[a], #156]\n\t" - "ldr r6, [%[b], #152]\n\t" - "ldr r7, [%[b], #156]\n\t" -#else - "ldrd r4, r5, [%[a], #152]\n\t" - "ldrd r6, r7, [%[b], #152]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #152]\n\t" - "str r5, [%[r], #156]\n\t" -#else - "strd r4, r5, [%[r], #152]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #160]\n\t" - "ldr r5, [%[a], #164]\n\t" - "ldr r6, [%[b], #160]\n\t" - "ldr r7, [%[b], #164]\n\t" -#else - "ldrd r4, r5, [%[a], #160]\n\t" - "ldrd r6, r7, [%[b], #160]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #160]\n\t" - "str r5, [%[r], #164]\n\t" -#else - "strd r4, r5, [%[r], #160]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #168]\n\t" - "ldr r5, [%[a], #172]\n\t" - "ldr r6, [%[b], #168]\n\t" - "ldr r7, [%[b], #172]\n\t" -#else - "ldrd r4, r5, [%[a], #168]\n\t" - "ldrd r6, r7, [%[b], #168]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #168]\n\t" - "str r5, [%[r], #172]\n\t" -#else - "strd r4, r5, [%[r], #168]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #176]\n\t" - "ldr r5, [%[a], #180]\n\t" - "ldr r6, [%[b], #176]\n\t" - "ldr r7, [%[b], #180]\n\t" -#else - "ldrd r4, r5, [%[a], #176]\n\t" - "ldrd r6, r7, [%[b], #176]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #176]\n\t" - "str r5, [%[r], #180]\n\t" -#else - "strd r4, r5, [%[r], #176]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #184]\n\t" - "ldr r5, [%[a], #188]\n\t" - "ldr r6, [%[b], #184]\n\t" - "ldr r7, [%[b], #188]\n\t" -#else - "ldrd r4, r5, [%[a], #184]\n\t" - "ldrd r6, r7, [%[b], #184]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #184]\n\t" - "str r5, [%[r], #188]\n\t" -#else - "strd r4, r5, [%[r], #184]\n\t" -#endif - "adc %[c], r8, r8\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8" + "stm %[r]!, {r4, r5}\n\t" + "adc %[r], r8, r8\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } -#endif /* !WOLFSSL_SP_SMALL */ +#endif /* WOLFSSL_SP_SMALL */ /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -19102,586 +43830,586 @@ int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod, static void sp_3072_lshift_96(sp_digit* r, const sp_digit* a, byte n) { __asm__ __volatile__ ( - "rsb r6, %[n], #31\n\t" - "ldr r3, [%[a], #380]\n\t" - "lsr r4, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r4, r4, r6\n\t" - "ldr r2, [%[a], #376]\n\t" - "str r4, [%[r], #384]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #372]\n\t" - "str r3, [%[r], #380]\n\t" - "lsr r5, r4, #1\n\t" + "rsb r12, %[n], #31\n\t" + "ldr r5, [%[a], #380]\n\t" + "lsr r6, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r6, r6, r12\n\t" + "ldr r4, [%[a], #376]\n\t" + "str r6, [%[r], #384]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #368]\n\t" - "str r2, [%[r], #376]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #364]\n\t" - "str r4, [%[r], #372]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #360]\n\t" - "str r3, [%[r], #368]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #372]\n\t" + "str r5, [%[r], #380]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #368]\n\t" + "str r4, [%[r], #376]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #364]\n\t" + "str r6, [%[r], #372]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #356]\n\t" - "str r2, [%[r], #364]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #352]\n\t" - "str r4, [%[r], #360]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #348]\n\t" - "str r3, [%[r], #356]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #360]\n\t" + "str r5, [%[r], #368]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #356]\n\t" + "str r4, [%[r], #364]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #352]\n\t" + "str r6, [%[r], #360]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #344]\n\t" - "str r2, [%[r], #352]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #340]\n\t" - "str r4, [%[r], #348]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #336]\n\t" - "str r3, [%[r], #344]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #348]\n\t" + "str r5, [%[r], #356]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #344]\n\t" + "str r4, [%[r], #352]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #340]\n\t" + "str r6, [%[r], #348]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #332]\n\t" - "str r2, [%[r], #340]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #328]\n\t" - "str r4, [%[r], #336]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #324]\n\t" - "str r3, [%[r], #332]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #336]\n\t" + "str r5, [%[r], #344]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #332]\n\t" + "str r4, [%[r], #340]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #328]\n\t" + "str r6, [%[r], #336]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #320]\n\t" - "str r2, [%[r], #328]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #316]\n\t" - "str r4, [%[r], #324]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #312]\n\t" - "str r3, [%[r], #320]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #324]\n\t" + "str r5, [%[r], #332]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #320]\n\t" + "str r4, [%[r], #328]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #316]\n\t" + "str r6, [%[r], #324]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #308]\n\t" - "str r2, [%[r], #316]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #304]\n\t" - "str r4, [%[r], #312]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #300]\n\t" - "str r3, [%[r], #308]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #312]\n\t" + "str r5, [%[r], #320]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #308]\n\t" + "str r4, [%[r], #316]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #304]\n\t" + "str r6, [%[r], #312]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #296]\n\t" - "str r2, [%[r], #304]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #292]\n\t" - "str r4, [%[r], #300]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #288]\n\t" - "str r3, [%[r], #296]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #300]\n\t" + "str r5, [%[r], #308]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #296]\n\t" + "str r4, [%[r], #304]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #292]\n\t" + "str r6, [%[r], #300]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #284]\n\t" - "str r2, [%[r], #292]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #280]\n\t" - "str r4, [%[r], #288]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #276]\n\t" - "str r3, [%[r], #284]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #288]\n\t" + "str r5, [%[r], #296]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #284]\n\t" + "str r4, [%[r], #292]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #280]\n\t" + "str r6, [%[r], #288]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #272]\n\t" - "str r2, [%[r], #280]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #268]\n\t" - "str r4, [%[r], #276]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #264]\n\t" - "str r3, [%[r], #272]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #276]\n\t" + "str r5, [%[r], #284]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #272]\n\t" + "str r4, [%[r], #280]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #268]\n\t" + "str r6, [%[r], #276]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #260]\n\t" - "str r2, [%[r], #268]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #256]\n\t" - "str r4, [%[r], #264]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #252]\n\t" - "str r3, [%[r], #260]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #264]\n\t" + "str r5, [%[r], #272]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #260]\n\t" + "str r4, [%[r], #268]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #256]\n\t" + "str r6, [%[r], #264]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #248]\n\t" - "str r2, [%[r], #256]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #244]\n\t" - "str r4, [%[r], #252]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #240]\n\t" - "str r3, [%[r], #248]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #252]\n\t" + "str r5, [%[r], #260]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #248]\n\t" + "str r4, [%[r], #256]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #244]\n\t" + "str r6, [%[r], #252]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #236]\n\t" - "str r2, [%[r], #244]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #232]\n\t" - "str r4, [%[r], #240]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #228]\n\t" - "str r3, [%[r], #236]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #240]\n\t" + "str r5, [%[r], #248]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #236]\n\t" + "str r4, [%[r], #244]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #232]\n\t" + "str r6, [%[r], #240]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #224]\n\t" - "str r2, [%[r], #232]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #220]\n\t" - "str r4, [%[r], #228]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #216]\n\t" - "str r3, [%[r], #224]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #228]\n\t" + "str r5, [%[r], #236]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #224]\n\t" + "str r4, [%[r], #232]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #220]\n\t" + "str r6, [%[r], #228]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #212]\n\t" - "str r2, [%[r], #220]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #208]\n\t" - "str r4, [%[r], #216]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #204]\n\t" - "str r3, [%[r], #212]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #216]\n\t" + "str r5, [%[r], #224]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #212]\n\t" + "str r4, [%[r], #220]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #208]\n\t" + "str r6, [%[r], #216]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #200]\n\t" - "str r2, [%[r], #208]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #196]\n\t" - "str r4, [%[r], #204]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #192]\n\t" - "str r3, [%[r], #200]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #204]\n\t" + "str r5, [%[r], #212]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #200]\n\t" + "str r4, [%[r], #208]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #196]\n\t" + "str r6, [%[r], #204]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #188]\n\t" - "str r2, [%[r], #196]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #184]\n\t" - "str r4, [%[r], #192]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #180]\n\t" - "str r3, [%[r], #188]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #192]\n\t" + "str r5, [%[r], #200]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #188]\n\t" + "str r4, [%[r], #196]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #184]\n\t" + "str r6, [%[r], #192]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #176]\n\t" - "str r2, [%[r], #184]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #172]\n\t" - "str r4, [%[r], #180]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #168]\n\t" - "str r3, [%[r], #176]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #180]\n\t" + "str r5, [%[r], #188]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #176]\n\t" + "str r4, [%[r], #184]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #172]\n\t" + "str r6, [%[r], #180]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #164]\n\t" - "str r2, [%[r], #172]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #160]\n\t" - "str r4, [%[r], #168]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #156]\n\t" - "str r3, [%[r], #164]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #168]\n\t" + "str r5, [%[r], #176]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #164]\n\t" + "str r4, [%[r], #172]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #160]\n\t" + "str r6, [%[r], #168]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #152]\n\t" - "str r2, [%[r], #160]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #148]\n\t" - "str r4, [%[r], #156]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #144]\n\t" - "str r3, [%[r], #152]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #156]\n\t" + "str r5, [%[r], #164]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #152]\n\t" + "str r4, [%[r], #160]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #148]\n\t" + "str r6, [%[r], #156]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #140]\n\t" - "str r2, [%[r], #148]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #136]\n\t" - "str r4, [%[r], #144]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #132]\n\t" - "str r3, [%[r], #140]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #144]\n\t" + "str r5, [%[r], #152]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #140]\n\t" + "str r4, [%[r], #148]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #136]\n\t" + "str r6, [%[r], #144]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #128]\n\t" - "str r2, [%[r], #136]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #124]\n\t" - "str r4, [%[r], #132]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #120]\n\t" - "str r3, [%[r], #128]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #132]\n\t" + "str r5, [%[r], #140]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #128]\n\t" + "str r4, [%[r], #136]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #124]\n\t" + "str r6, [%[r], #132]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #116]\n\t" - "str r2, [%[r], #124]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #112]\n\t" - "str r4, [%[r], #120]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #108]\n\t" - "str r3, [%[r], #116]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #120]\n\t" + "str r5, [%[r], #128]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #116]\n\t" + "str r4, [%[r], #124]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #112]\n\t" + "str r6, [%[r], #120]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #104]\n\t" - "str r2, [%[r], #112]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #100]\n\t" - "str r4, [%[r], #108]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #96]\n\t" - "str r3, [%[r], #104]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #108]\n\t" + "str r5, [%[r], #116]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #104]\n\t" + "str r4, [%[r], #112]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #100]\n\t" + "str r6, [%[r], #108]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #92]\n\t" - "str r2, [%[r], #100]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #88]\n\t" - "str r4, [%[r], #96]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #84]\n\t" - "str r3, [%[r], #92]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #96]\n\t" + "str r5, [%[r], #104]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #92]\n\t" + "str r4, [%[r], #100]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #88]\n\t" + "str r6, [%[r], #96]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #80]\n\t" - "str r2, [%[r], #88]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #76]\n\t" - "str r4, [%[r], #84]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #72]\n\t" - "str r3, [%[r], #80]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #84]\n\t" + "str r5, [%[r], #92]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #80]\n\t" + "str r4, [%[r], #88]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #76]\n\t" + "str r6, [%[r], #84]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #68]\n\t" - "str r2, [%[r], #76]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #64]\n\t" - "str r4, [%[r], #72]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #60]\n\t" - "str r3, [%[r], #68]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #72]\n\t" + "str r5, [%[r], #80]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #68]\n\t" + "str r4, [%[r], #76]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #64]\n\t" + "str r6, [%[r], #72]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #56]\n\t" - "str r2, [%[r], #64]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #52]\n\t" - "str r4, [%[r], #60]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #48]\n\t" - "str r3, [%[r], #56]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #60]\n\t" + "str r5, [%[r], #68]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #56]\n\t" + "str r4, [%[r], #64]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #52]\n\t" + "str r6, [%[r], #60]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #44]\n\t" - "str r2, [%[r], #52]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #40]\n\t" - "str r4, [%[r], #48]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #36]\n\t" - "str r3, [%[r], #44]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #48]\n\t" + "str r5, [%[r], #56]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #44]\n\t" + "str r4, [%[r], #52]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #40]\n\t" + "str r6, [%[r], #48]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #32]\n\t" - "str r2, [%[r], #40]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #28]\n\t" - "str r4, [%[r], #36]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #24]\n\t" - "str r3, [%[r], #32]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #36]\n\t" + "str r5, [%[r], #44]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #32]\n\t" + "str r4, [%[r], #40]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #28]\n\t" + "str r6, [%[r], #36]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #20]\n\t" - "str r2, [%[r], #28]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #16]\n\t" - "str r4, [%[r], #24]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #12]\n\t" - "str r3, [%[r], #20]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #24]\n\t" + "str r5, [%[r], #32]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #20]\n\t" + "str r4, [%[r], #28]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #16]\n\t" + "str r6, [%[r], #24]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #8]\n\t" - "str r2, [%[r], #16]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #4]\n\t" - "str r4, [%[r], #12]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #0]\n\t" - "str r3, [%[r], #8]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #12]\n\t" + "str r5, [%[r], #20]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #8]\n\t" + "str r4, [%[r], #16]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #4]\n\t" + "str r6, [%[r], #12]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "str r4, [%[r], #0]\n\t" - "str r2, [%[r], #4]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a]]\n\t" + "str r5, [%[r], #8]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "str r6, [%[r]]\n\t" + "str r4, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : [r] "r" (r), [a] "r" (a), [n] "r" (n) - : "memory", "r2", "r3", "r4", "r5", "r6" + : "memory", "r4", "r5", "r6", "r3", "r12" ); } @@ -20099,240 +44827,237 @@ static void sp_4096_to_bin_128(sp_digit* r, byte* a) */ static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r11, r11\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -20341,244 +45066,240 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "adc %[c], r14, r14\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "adc %[r], r12, r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); - - return c; + return (uint32_t)(size_t)r; } /* Multiply a and b into r. (r = a * b) @@ -20664,15 +45385,14 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #512\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #0x200\n\t" + "\n" + "L_sp_4096_add_128_word_%=: \n\t" + "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" @@ -20681,15 +45401,15 @@ static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, "adcs r7, r7, r11\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "adc r3, r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne L_sp_4096_add_128_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -20701,29 +45421,29 @@ static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, */ static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #512\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r10, #0\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x200\n\t" + "\n" + "L_sp_4096_sub_in_pkace_128_word_%=: \n\t" + "subs r12, r10, r12\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r10, r10\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_4096_sub_in_pkace_128_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ @@ -20737,54 +45457,81 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b) { __asm__ __volatile__ ( - "sub sp, sp, #1024\n\t" + "sub sp, sp, #0x400\n\t" "mov r5, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #508\n\t" + "\n" + "L_sp_4096_mul_128_outer_%=: \n\t" + "subs r3, r5, #0x1fc\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_4096_mul_128_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #512\n\t" - "beq 3f\n\t" + "cmp r3, #0x200\n\t" + "beq L_sp_4096_mul_128_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_4096_mul_128_inner_%=\n\t" + "\n" + "L_sp_4096_mul_128_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #1016\n\t" - "ble 1b\n\t" + "cmp r5, #0x3f8\n\t" + "ble L_sp_4096_mul_128_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_4096_mul_128_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "bgt L_sp_4096_mul_128_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); } @@ -20796,77 +45543,132 @@ static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, const sp_digit* b) static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "sub sp, sp, #1024\n\t" + "sub sp, sp, #0x400\n\t" "mov r12, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" "mov r5, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #508\n\t" + "\n" + "L_sp_4096_sqr_128_outer_%=: \n\t" + "subs r3, r5, #0x1fc\n\t" "it cc\n\t" "movcc r3, r12\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" + "\n" + "L_sp_4096_sqr_128_inner_%=: \n\t" "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "beq L_sp_4096_sqr_128_op_sqr_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "bal L_sp_4096_sqr_128_op_done_%=\n\t" + "\n" + "L_sp_4096_sqr_128_op_sqr_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_4096_sqr_128_op_done_%=: \n\t" "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #512\n\t" - "beq 3f\n\t" + "cmp r3, #0x200\n\t" + "beq L_sp_4096_sqr_128_inner_done_%=\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" + "bgt L_sp_4096_sqr_128_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_4096_sqr_128_inner_%=\n\t" + "\n" + "L_sp_4096_sqr_128_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #1016\n\t" - "ble 1b\n\t" + "cmp r5, #0x3f8\n\t" + "ble L_sp_4096_sqr_128_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r9, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r9, [%[r], #12]\n\t" -#else - "ldrd r6, r7, [sp, #0]\n\t" - "ldrd r8, r9, [sp, #8]\n\t" - "strd r6, r7, [%[r], #0]\n\t" - "strd r8, r9, [%[r], #8]\n\t" -#endif - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_4096_sqr_128_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "bgt L_sp_4096_sqr_128_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "r12" ); } @@ -20891,947 +45693,5222 @@ static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho) *rho = (sp_digit)0 - x; } +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b) { -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_4096_mul_d_128_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" - "cmp r9, #512\n\t" - "blt 1b\n\t" + "cmp r9, #0x200\n\t" + "blt L_sp_4096_mul_d_128_word_%=\n\t" "str r3, [%[r], #512]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); +} + #else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b) +{ __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else "umull r3, r4, %[b], r8\n\t" +#endif "mov r5, #0\n\t" "str r3, [%[r]], #4\n\t" - "# A[1] * B\n\t" + /* A[1] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[2] * B\n\t" + /* A[2] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[3] * B\n\t" + /* A[3] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[4] * B\n\t" + /* A[4] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[5] * B\n\t" + /* A[5] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[6] * B\n\t" + /* A[6] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[7] * B\n\t" + /* A[7] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[8] * B\n\t" + /* A[8] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[9] * B\n\t" + /* A[9] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[10] * B\n\t" + /* A[10] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[11] * B\n\t" + /* A[11] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[12] * B\n\t" + /* A[12] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[13] * B\n\t" + /* A[13] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[14] * B\n\t" + /* A[14] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[15] * B\n\t" + /* A[15] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[16] * B\n\t" + /* A[16] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[17] * B\n\t" + /* A[17] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[18] * B\n\t" + /* A[18] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[19] * B\n\t" + /* A[19] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[20] * B\n\t" + /* A[20] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[21] * B\n\t" + /* A[21] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[22] * B\n\t" + /* A[22] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[23] * B\n\t" + /* A[23] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[24] * B\n\t" + /* A[24] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[25] * B\n\t" + /* A[25] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[26] * B\n\t" + /* A[26] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[27] * B\n\t" + /* A[27] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[28] * B\n\t" + /* A[28] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[29] * B\n\t" + /* A[29] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[30] * B\n\t" + /* A[30] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[31] * B\n\t" + /* A[31] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[32] * B\n\t" + /* A[32] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[33] * B\n\t" + /* A[33] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[34] * B\n\t" + /* A[34] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[35] * B\n\t" + /* A[35] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[36] * B\n\t" + /* A[36] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[37] * B\n\t" + /* A[37] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[38] * B\n\t" + /* A[38] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[39] * B\n\t" + /* A[39] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[40] * B\n\t" + /* A[40] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[41] * B\n\t" + /* A[41] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[42] * B\n\t" + /* A[42] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[43] * B\n\t" + /* A[43] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[44] * B\n\t" + /* A[44] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[45] * B\n\t" + /* A[45] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[46] * B\n\t" + /* A[46] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[47] * B\n\t" + /* A[47] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[48] * B\n\t" + /* A[48] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[49] * B\n\t" + /* A[49] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[50] * B\n\t" + /* A[50] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[51] * B\n\t" + /* A[51] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[52] * B\n\t" + /* A[52] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[53] * B\n\t" + /* A[53] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[54] * B\n\t" + /* A[54] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[55] * B\n\t" + /* A[55] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[56] * B\n\t" + /* A[56] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[57] * B\n\t" + /* A[57] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[58] * B\n\t" + /* A[58] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[59] * B\n\t" + /* A[59] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[60] * B\n\t" + /* A[60] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[61] * B\n\t" + /* A[61] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[62] * B\n\t" + /* A[62] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[63] * B\n\t" + /* A[63] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[64] * B\n\t" + /* A[64] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[65] * B\n\t" + /* A[65] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[66] * B\n\t" + /* A[66] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[67] * B\n\t" + /* A[67] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[68] * B\n\t" + /* A[68] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[69] * B\n\t" + /* A[69] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[70] * B\n\t" + /* A[70] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[71] * B\n\t" + /* A[71] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[72] * B\n\t" + /* A[72] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[73] * B\n\t" + /* A[73] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[74] * B\n\t" + /* A[74] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[75] * B\n\t" + /* A[75] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[76] * B\n\t" + /* A[76] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[77] * B\n\t" + /* A[77] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[78] * B\n\t" + /* A[78] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[79] * B\n\t" + /* A[79] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[80] * B\n\t" + /* A[80] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[81] * B\n\t" + /* A[81] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[82] * B\n\t" + /* A[82] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[83] * B\n\t" + /* A[83] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[84] * B\n\t" + /* A[84] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[85] * B\n\t" + /* A[85] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[86] * B\n\t" + /* A[86] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[87] * B\n\t" + /* A[87] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[88] * B\n\t" + /* A[88] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[89] * B\n\t" + /* A[89] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[90] * B\n\t" + /* A[90] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[91] * B\n\t" + /* A[91] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[92] * B\n\t" + /* A[92] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[93] * B\n\t" + /* A[93] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[94] * B\n\t" + /* A[94] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[95] * B\n\t" + /* A[95] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[96] * B\n\t" + /* A[96] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[97] * B\n\t" + /* A[97] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[98] * B\n\t" + /* A[98] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[99] * B\n\t" + /* A[99] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[100] * B\n\t" + /* A[100] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[101] * B\n\t" + /* A[101] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[102] * B\n\t" + /* A[102] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[103] * B\n\t" + /* A[103] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[104] * B\n\t" + /* A[104] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[105] * B\n\t" + /* A[105] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[106] * B\n\t" + /* A[106] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[107] * B\n\t" + /* A[107] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[108] * B\n\t" + /* A[108] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[109] * B\n\t" + /* A[109] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[110] * B\n\t" + /* A[110] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[111] * B\n\t" + /* A[111] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[112] * B\n\t" + /* A[112] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[113] * B\n\t" + /* A[113] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[114] * B\n\t" + /* A[114] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[115] * B\n\t" + /* A[115] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[116] * B\n\t" + /* A[116] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[117] * B\n\t" + /* A[117] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[118] * B\n\t" + /* A[118] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[119] * B\n\t" + /* A[119] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[120] * B\n\t" + /* A[120] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[121] * B\n\t" + /* A[121] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[122] * B\n\t" + /* A[122] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[123] * B\n\t" + /* A[123] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[124] * B\n\t" + /* A[124] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[125] * B\n\t" + /* A[125] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[126] * B\n\t" + /* A[126] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[127] * B\n\t" + /* A[127] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" +#endif "str r4, [%[r]], #4\n\t" "str r5, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10" ); -#endif } +#endif /* WOLFSSL_SP_SMALL */ #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 4096 bits, just need to subtract. @@ -21848,6 +50925,7 @@ static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) } #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */ +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -21856,34 +50934,45 @@ static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r6, [%[b], r8]\n\t" - "and r6, r6, %[m]\n\t" - "sbcs r4, r4, r6\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #512\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_4096_cond_sub_128_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #0x200\n\t" + "blt L_sp_4096_cond_sub_128_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6" ); -#else - __asm__ __volatile__ ( + return (uint32_t)(size_t)r; +} - "mov r9, #0\n\t" +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +{ + __asm__ __volatile__ ( + "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" @@ -22332,1200 +51421,4541 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_di "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" "stm %[r]!, {r4, r5}\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7" ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#endif /* WOLFSSL_SP_SMALL */ /* Reduce the number back to 4096 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit mp) { - sp_digit ca = 0; - __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_4096_mont_reduce_128_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, r8, r11\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r10, #0\n\t" +#endif + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r10, #0\n\t" +#endif + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" +#endif "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" - "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" "adc r4, r4, #0\n\t" - "# a[i+12] += m[12] * mu\n\t" - "ldr r7, [%[m], #48]\n\t" - "ldr r9, [%[a], #48]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #48]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+13] += m[13] * mu\n\t" - "ldr r7, [%[m], #52]\n\t" - "ldr r9, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #52]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" "adc r4, r4, #0\n\t" - "# a[i+14] += m[14] * mu\n\t" - "ldr r7, [%[m], #56]\n\t" - "ldr r9, [%[a], #56]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #56]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+15] += m[15] * mu\n\t" - "ldr r7, [%[m], #60]\n\t" - "ldr r9, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #60]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" "adc r4, r4, #0\n\t" - "# a[i+16] += m[16] * mu\n\t" - "ldr r7, [%[m], #64]\n\t" - "ldr r9, [%[a], #64]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #64]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+17] += m[17] * mu\n\t" - "ldr r7, [%[m], #68]\n\t" - "ldr r9, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #68]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" "adc r4, r4, #0\n\t" - "# a[i+18] += m[18] * mu\n\t" - "ldr r7, [%[m], #72]\n\t" - "ldr r9, [%[a], #72]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #72]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+19] += m[19] * mu\n\t" - "ldr r7, [%[m], #76]\n\t" - "ldr r9, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #76]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" "adc r4, r4, #0\n\t" - "# a[i+20] += m[20] * mu\n\t" - "ldr r7, [%[m], #80]\n\t" - "ldr r9, [%[a], #80]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #80]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+21] += m[21] * mu\n\t" - "ldr r7, [%[m], #84]\n\t" - "ldr r9, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #84]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" "adc r4, r4, #0\n\t" - "# a[i+22] += m[22] * mu\n\t" - "ldr r7, [%[m], #88]\n\t" - "ldr r9, [%[a], #88]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #88]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+23] += m[23] * mu\n\t" - "ldr r7, [%[m], #92]\n\t" - "ldr r9, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #92]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" "adc r4, r4, #0\n\t" - "# a[i+24] += m[24] * mu\n\t" - "ldr r7, [%[m], #96]\n\t" - "ldr r9, [%[a], #96]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #96]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+25] += m[25] * mu\n\t" - "ldr r7, [%[m], #100]\n\t" - "ldr r9, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #100]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" "adc r4, r4, #0\n\t" - "# a[i+26] += m[26] * mu\n\t" - "ldr r7, [%[m], #104]\n\t" - "ldr r9, [%[a], #104]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #104]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+27] += m[27] * mu\n\t" - "ldr r7, [%[m], #108]\n\t" - "ldr r9, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+31] += m[31] * mu */ + "ldr r7, [%[m], #124]\n\t" + "ldr r10, [%[a], #124]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #108]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" "adc r4, r4, #0\n\t" - "# a[i+28] += m[28] * mu\n\t" - "ldr r7, [%[m], #112]\n\t" - "ldr r9, [%[a], #112]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #112]\n\t" + /* a[i+32] += m[32] * mu */ + "ldr r7, [%[m], #128]\n\t" + "ldr r10, [%[a], #128]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+29] += m[29] * mu\n\t" - "ldr r7, [%[m], #116]\n\t" - "ldr r9, [%[a], #116]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+33] += m[33] * mu */ + "ldr r7, [%[m], #132]\n\t" + "ldr r10, [%[a], #132]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #116]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #132]\n\t" "adc r4, r4, #0\n\t" - "# a[i+30] += m[30] * mu\n\t" - "ldr r7, [%[m], #120]\n\t" - "ldr r9, [%[a], #120]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #120]\n\t" + /* a[i+34] += m[34] * mu */ + "ldr r7, [%[m], #136]\n\t" + "ldr r10, [%[a], #136]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+31] += m[31] * mu\n\t" - "ldr r7, [%[m], #124]\n\t" - "ldr r9, [%[a], #124]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #136]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+35] += m[35] * mu */ + "ldr r7, [%[m], #140]\n\t" + "ldr r10, [%[a], #140]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #124]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #140]\n\t" "adc r4, r4, #0\n\t" - "# a[i+32] += m[32] * mu\n\t" - "ldr r7, [%[m], #128]\n\t" - "ldr r9, [%[a], #128]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #128]\n\t" + /* a[i+36] += m[36] * mu */ + "ldr r7, [%[m], #144]\n\t" + "ldr r10, [%[a], #144]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+33] += m[33] * mu\n\t" - "ldr r7, [%[m], #132]\n\t" - "ldr r9, [%[a], #132]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #144]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+37] += m[37] * mu */ + "ldr r7, [%[m], #148]\n\t" + "ldr r10, [%[a], #148]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #132]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #148]\n\t" "adc r4, r4, #0\n\t" - "# a[i+34] += m[34] * mu\n\t" - "ldr r7, [%[m], #136]\n\t" - "ldr r9, [%[a], #136]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #136]\n\t" + /* a[i+38] += m[38] * mu */ + "ldr r7, [%[m], #152]\n\t" + "ldr r10, [%[a], #152]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+35] += m[35] * mu\n\t" - "ldr r7, [%[m], #140]\n\t" - "ldr r9, [%[a], #140]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #152]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+39] += m[39] * mu */ + "ldr r7, [%[m], #156]\n\t" + "ldr r10, [%[a], #156]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #140]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #156]\n\t" "adc r4, r4, #0\n\t" - "# a[i+36] += m[36] * mu\n\t" - "ldr r7, [%[m], #144]\n\t" - "ldr r9, [%[a], #144]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #144]\n\t" + /* a[i+40] += m[40] * mu */ + "ldr r7, [%[m], #160]\n\t" + "ldr r10, [%[a], #160]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+37] += m[37] * mu\n\t" - "ldr r7, [%[m], #148]\n\t" - "ldr r9, [%[a], #148]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #160]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+41] += m[41] * mu */ + "ldr r7, [%[m], #164]\n\t" + "ldr r10, [%[a], #164]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #148]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #164]\n\t" "adc r4, r4, #0\n\t" - "# a[i+38] += m[38] * mu\n\t" - "ldr r7, [%[m], #152]\n\t" - "ldr r9, [%[a], #152]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #152]\n\t" + /* a[i+42] += m[42] * mu */ + "ldr r7, [%[m], #168]\n\t" + "ldr r10, [%[a], #168]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+39] += m[39] * mu\n\t" - "ldr r7, [%[m], #156]\n\t" - "ldr r9, [%[a], #156]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #168]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+43] += m[43] * mu */ + "ldr r7, [%[m], #172]\n\t" + "ldr r10, [%[a], #172]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #156]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #172]\n\t" "adc r4, r4, #0\n\t" - "# a[i+40] += m[40] * mu\n\t" - "ldr r7, [%[m], #160]\n\t" - "ldr r9, [%[a], #160]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #160]\n\t" + /* a[i+44] += m[44] * mu */ + "ldr r7, [%[m], #176]\n\t" + "ldr r10, [%[a], #176]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+41] += m[41] * mu\n\t" - "ldr r7, [%[m], #164]\n\t" - "ldr r9, [%[a], #164]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #176]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+45] += m[45] * mu */ + "ldr r7, [%[m], #180]\n\t" + "ldr r10, [%[a], #180]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #164]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #180]\n\t" "adc r4, r4, #0\n\t" - "# a[i+42] += m[42] * mu\n\t" - "ldr r7, [%[m], #168]\n\t" - "ldr r9, [%[a], #168]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #168]\n\t" + /* a[i+46] += m[46] * mu */ + "ldr r7, [%[m], #184]\n\t" + "ldr r10, [%[a], #184]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+43] += m[43] * mu\n\t" - "ldr r7, [%[m], #172]\n\t" - "ldr r9, [%[a], #172]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #184]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+47] += m[47] * mu */ + "ldr r7, [%[m], #188]\n\t" + "ldr r10, [%[a], #188]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #172]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #188]\n\t" "adc r4, r4, #0\n\t" - "# a[i+44] += m[44] * mu\n\t" - "ldr r7, [%[m], #176]\n\t" - "ldr r9, [%[a], #176]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #176]\n\t" + /* a[i+48] += m[48] * mu */ + "ldr r7, [%[m], #192]\n\t" + "ldr r10, [%[a], #192]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+45] += m[45] * mu\n\t" - "ldr r7, [%[m], #180]\n\t" - "ldr r9, [%[a], #180]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #192]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+49] += m[49] * mu */ + "ldr r7, [%[m], #196]\n\t" + "ldr r10, [%[a], #196]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #180]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #196]\n\t" "adc r4, r4, #0\n\t" - "# a[i+46] += m[46] * mu\n\t" - "ldr r7, [%[m], #184]\n\t" - "ldr r9, [%[a], #184]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #184]\n\t" + /* a[i+50] += m[50] * mu */ + "ldr r7, [%[m], #200]\n\t" + "ldr r10, [%[a], #200]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+47] += m[47] * mu\n\t" - "ldr r7, [%[m], #188]\n\t" - "ldr r9, [%[a], #188]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #200]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+51] += m[51] * mu */ + "ldr r7, [%[m], #204]\n\t" + "ldr r10, [%[a], #204]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #188]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #204]\n\t" "adc r4, r4, #0\n\t" - "# a[i+48] += m[48] * mu\n\t" - "ldr r7, [%[m], #192]\n\t" - "ldr r9, [%[a], #192]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #192]\n\t" + /* a[i+52] += m[52] * mu */ + "ldr r7, [%[m], #208]\n\t" + "ldr r10, [%[a], #208]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+49] += m[49] * mu\n\t" - "ldr r7, [%[m], #196]\n\t" - "ldr r9, [%[a], #196]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #208]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+53] += m[53] * mu */ + "ldr r7, [%[m], #212]\n\t" + "ldr r10, [%[a], #212]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #196]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #212]\n\t" "adc r4, r4, #0\n\t" - "# a[i+50] += m[50] * mu\n\t" - "ldr r7, [%[m], #200]\n\t" - "ldr r9, [%[a], #200]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #200]\n\t" + /* a[i+54] += m[54] * mu */ + "ldr r7, [%[m], #216]\n\t" + "ldr r10, [%[a], #216]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+51] += m[51] * mu\n\t" - "ldr r7, [%[m], #204]\n\t" - "ldr r9, [%[a], #204]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #216]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+55] += m[55] * mu */ + "ldr r7, [%[m], #220]\n\t" + "ldr r10, [%[a], #220]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #204]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #220]\n\t" "adc r4, r4, #0\n\t" - "# a[i+52] += m[52] * mu\n\t" - "ldr r7, [%[m], #208]\n\t" - "ldr r9, [%[a], #208]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #208]\n\t" + /* a[i+56] += m[56] * mu */ + "ldr r7, [%[m], #224]\n\t" + "ldr r10, [%[a], #224]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+53] += m[53] * mu\n\t" - "ldr r7, [%[m], #212]\n\t" - "ldr r9, [%[a], #212]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #224]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+57] += m[57] * mu */ + "ldr r7, [%[m], #228]\n\t" + "ldr r10, [%[a], #228]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #212]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #228]\n\t" "adc r4, r4, #0\n\t" - "# a[i+54] += m[54] * mu\n\t" - "ldr r7, [%[m], #216]\n\t" - "ldr r9, [%[a], #216]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #216]\n\t" + /* a[i+58] += m[58] * mu */ + "ldr r7, [%[m], #232]\n\t" + "ldr r10, [%[a], #232]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+55] += m[55] * mu\n\t" - "ldr r7, [%[m], #220]\n\t" - "ldr r9, [%[a], #220]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #232]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+59] += m[59] * mu */ + "ldr r7, [%[m], #236]\n\t" + "ldr r10, [%[a], #236]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #220]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #236]\n\t" "adc r4, r4, #0\n\t" - "# a[i+56] += m[56] * mu\n\t" - "ldr r7, [%[m], #224]\n\t" - "ldr r9, [%[a], #224]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #224]\n\t" + /* a[i+60] += m[60] * mu */ + "ldr r7, [%[m], #240]\n\t" + "ldr r10, [%[a], #240]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+57] += m[57] * mu\n\t" - "ldr r7, [%[m], #228]\n\t" - "ldr r9, [%[a], #228]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #240]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+61] += m[61] * mu */ + "ldr r7, [%[m], #244]\n\t" + "ldr r10, [%[a], #244]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #228]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #244]\n\t" "adc r4, r4, #0\n\t" - "# a[i+58] += m[58] * mu\n\t" - "ldr r7, [%[m], #232]\n\t" - "ldr r9, [%[a], #232]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #232]\n\t" + /* a[i+62] += m[62] * mu */ + "ldr r7, [%[m], #248]\n\t" + "ldr r10, [%[a], #248]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+59] += m[59] * mu\n\t" - "ldr r7, [%[m], #236]\n\t" - "ldr r9, [%[a], #236]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #248]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+63] += m[63] * mu */ + "ldr r7, [%[m], #252]\n\t" + "ldr r10, [%[a], #252]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #236]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #252]\n\t" "adc r4, r4, #0\n\t" - "# a[i+60] += m[60] * mu\n\t" - "ldr r7, [%[m], #240]\n\t" - "ldr r9, [%[a], #240]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #240]\n\t" + /* a[i+64] += m[64] * mu */ + "ldr r7, [%[m], #256]\n\t" + "ldr r10, [%[a], #256]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+61] += m[61] * mu\n\t" - "ldr r7, [%[m], #244]\n\t" - "ldr r9, [%[a], #244]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #256]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+65] += m[65] * mu */ + "ldr r7, [%[m], #260]\n\t" + "ldr r10, [%[a], #260]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #244]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #260]\n\t" "adc r4, r4, #0\n\t" - "# a[i+62] += m[62] * mu\n\t" - "ldr r7, [%[m], #248]\n\t" - "ldr r9, [%[a], #248]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #248]\n\t" + /* a[i+66] += m[66] * mu */ + "ldr r7, [%[m], #264]\n\t" + "ldr r10, [%[a], #264]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+63] += m[63] * mu\n\t" - "ldr r7, [%[m], #252]\n\t" - "ldr r9, [%[a], #252]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #264]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+67] += m[67] * mu */ + "ldr r7, [%[m], #268]\n\t" + "ldr r10, [%[a], #268]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #252]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #268]\n\t" "adc r4, r4, #0\n\t" - "# a[i+64] += m[64] * mu\n\t" - "ldr r7, [%[m], #256]\n\t" - "ldr r9, [%[a], #256]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #256]\n\t" + /* a[i+68] += m[68] * mu */ + "ldr r7, [%[m], #272]\n\t" + "ldr r10, [%[a], #272]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+65] += m[65] * mu\n\t" - "ldr r7, [%[m], #260]\n\t" - "ldr r9, [%[a], #260]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #272]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+69] += m[69] * mu */ + "ldr r7, [%[m], #276]\n\t" + "ldr r10, [%[a], #276]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #260]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #276]\n\t" "adc r4, r4, #0\n\t" - "# a[i+66] += m[66] * mu\n\t" - "ldr r7, [%[m], #264]\n\t" - "ldr r9, [%[a], #264]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #264]\n\t" + /* a[i+70] += m[70] * mu */ + "ldr r7, [%[m], #280]\n\t" + "ldr r10, [%[a], #280]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+67] += m[67] * mu\n\t" - "ldr r7, [%[m], #268]\n\t" - "ldr r9, [%[a], #268]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #280]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+71] += m[71] * mu */ + "ldr r7, [%[m], #284]\n\t" + "ldr r10, [%[a], #284]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #268]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #284]\n\t" "adc r4, r4, #0\n\t" - "# a[i+68] += m[68] * mu\n\t" - "ldr r7, [%[m], #272]\n\t" - "ldr r9, [%[a], #272]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #272]\n\t" + /* a[i+72] += m[72] * mu */ + "ldr r7, [%[m], #288]\n\t" + "ldr r10, [%[a], #288]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+69] += m[69] * mu\n\t" - "ldr r7, [%[m], #276]\n\t" - "ldr r9, [%[a], #276]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #288]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+73] += m[73] * mu */ + "ldr r7, [%[m], #292]\n\t" + "ldr r10, [%[a], #292]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #276]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #292]\n\t" "adc r4, r4, #0\n\t" - "# a[i+70] += m[70] * mu\n\t" - "ldr r7, [%[m], #280]\n\t" - "ldr r9, [%[a], #280]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #280]\n\t" + /* a[i+74] += m[74] * mu */ + "ldr r7, [%[m], #296]\n\t" + "ldr r10, [%[a], #296]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+71] += m[71] * mu\n\t" - "ldr r7, [%[m], #284]\n\t" - "ldr r9, [%[a], #284]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #296]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+75] += m[75] * mu */ + "ldr r7, [%[m], #300]\n\t" + "ldr r10, [%[a], #300]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #284]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #300]\n\t" "adc r4, r4, #0\n\t" - "# a[i+72] += m[72] * mu\n\t" - "ldr r7, [%[m], #288]\n\t" - "ldr r9, [%[a], #288]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #288]\n\t" + /* a[i+76] += m[76] * mu */ + "ldr r7, [%[m], #304]\n\t" + "ldr r10, [%[a], #304]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+73] += m[73] * mu\n\t" - "ldr r7, [%[m], #292]\n\t" - "ldr r9, [%[a], #292]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #304]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+77] += m[77] * mu */ + "ldr r7, [%[m], #308]\n\t" + "ldr r10, [%[a], #308]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #292]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #308]\n\t" "adc r4, r4, #0\n\t" - "# a[i+74] += m[74] * mu\n\t" - "ldr r7, [%[m], #296]\n\t" - "ldr r9, [%[a], #296]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #296]\n\t" + /* a[i+78] += m[78] * mu */ + "ldr r7, [%[m], #312]\n\t" + "ldr r10, [%[a], #312]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+75] += m[75] * mu\n\t" - "ldr r7, [%[m], #300]\n\t" - "ldr r9, [%[a], #300]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #312]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+79] += m[79] * mu */ + "ldr r7, [%[m], #316]\n\t" + "ldr r10, [%[a], #316]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #300]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #316]\n\t" "adc r4, r4, #0\n\t" - "# a[i+76] += m[76] * mu\n\t" - "ldr r7, [%[m], #304]\n\t" - "ldr r9, [%[a], #304]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #304]\n\t" + /* a[i+80] += m[80] * mu */ + "ldr r7, [%[m], #320]\n\t" + "ldr r10, [%[a], #320]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+77] += m[77] * mu\n\t" - "ldr r7, [%[m], #308]\n\t" - "ldr r9, [%[a], #308]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #320]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+81] += m[81] * mu */ + "ldr r7, [%[m], #324]\n\t" + "ldr r10, [%[a], #324]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #308]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #324]\n\t" "adc r4, r4, #0\n\t" - "# a[i+78] += m[78] * mu\n\t" - "ldr r7, [%[m], #312]\n\t" - "ldr r9, [%[a], #312]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #312]\n\t" + /* a[i+82] += m[82] * mu */ + "ldr r7, [%[m], #328]\n\t" + "ldr r10, [%[a], #328]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+79] += m[79] * mu\n\t" - "ldr r7, [%[m], #316]\n\t" - "ldr r9, [%[a], #316]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #328]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+83] += m[83] * mu */ + "ldr r7, [%[m], #332]\n\t" + "ldr r10, [%[a], #332]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #316]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #332]\n\t" "adc r4, r4, #0\n\t" - "# a[i+80] += m[80] * mu\n\t" - "ldr r7, [%[m], #320]\n\t" - "ldr r9, [%[a], #320]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #320]\n\t" + /* a[i+84] += m[84] * mu */ + "ldr r7, [%[m], #336]\n\t" + "ldr r10, [%[a], #336]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+81] += m[81] * mu\n\t" - "ldr r7, [%[m], #324]\n\t" - "ldr r9, [%[a], #324]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #336]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+85] += m[85] * mu */ + "ldr r7, [%[m], #340]\n\t" + "ldr r10, [%[a], #340]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #324]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #340]\n\t" "adc r4, r4, #0\n\t" - "# a[i+82] += m[82] * mu\n\t" - "ldr r7, [%[m], #328]\n\t" - "ldr r9, [%[a], #328]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #328]\n\t" + /* a[i+86] += m[86] * mu */ + "ldr r7, [%[m], #344]\n\t" + "ldr r10, [%[a], #344]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+83] += m[83] * mu\n\t" - "ldr r7, [%[m], #332]\n\t" - "ldr r9, [%[a], #332]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #344]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+87] += m[87] * mu */ + "ldr r7, [%[m], #348]\n\t" + "ldr r10, [%[a], #348]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #332]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #348]\n\t" "adc r4, r4, #0\n\t" - "# a[i+84] += m[84] * mu\n\t" - "ldr r7, [%[m], #336]\n\t" - "ldr r9, [%[a], #336]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #336]\n\t" + /* a[i+88] += m[88] * mu */ + "ldr r7, [%[m], #352]\n\t" + "ldr r10, [%[a], #352]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+85] += m[85] * mu\n\t" - "ldr r7, [%[m], #340]\n\t" - "ldr r9, [%[a], #340]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #352]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+89] += m[89] * mu */ + "ldr r7, [%[m], #356]\n\t" + "ldr r10, [%[a], #356]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #340]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #356]\n\t" "adc r4, r4, #0\n\t" - "# a[i+86] += m[86] * mu\n\t" - "ldr r7, [%[m], #344]\n\t" - "ldr r9, [%[a], #344]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #344]\n\t" + /* a[i+90] += m[90] * mu */ + "ldr r7, [%[m], #360]\n\t" + "ldr r10, [%[a], #360]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+87] += m[87] * mu\n\t" - "ldr r7, [%[m], #348]\n\t" - "ldr r9, [%[a], #348]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #360]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+91] += m[91] * mu */ + "ldr r7, [%[m], #364]\n\t" + "ldr r10, [%[a], #364]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #348]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #364]\n\t" "adc r4, r4, #0\n\t" - "# a[i+88] += m[88] * mu\n\t" - "ldr r7, [%[m], #352]\n\t" - "ldr r9, [%[a], #352]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #352]\n\t" + /* a[i+92] += m[92] * mu */ + "ldr r7, [%[m], #368]\n\t" + "ldr r10, [%[a], #368]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+89] += m[89] * mu\n\t" - "ldr r7, [%[m], #356]\n\t" - "ldr r9, [%[a], #356]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #368]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+93] += m[93] * mu */ + "ldr r7, [%[m], #372]\n\t" + "ldr r10, [%[a], #372]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #356]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #372]\n\t" "adc r4, r4, #0\n\t" - "# a[i+90] += m[90] * mu\n\t" - "ldr r7, [%[m], #360]\n\t" - "ldr r9, [%[a], #360]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #360]\n\t" + /* a[i+94] += m[94] * mu */ + "ldr r7, [%[m], #376]\n\t" + "ldr r10, [%[a], #376]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+91] += m[91] * mu\n\t" - "ldr r7, [%[m], #364]\n\t" - "ldr r9, [%[a], #364]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #376]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+95] += m[95] * mu */ + "ldr r7, [%[m], #380]\n\t" + "ldr r10, [%[a], #380]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #364]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #380]\n\t" "adc r4, r4, #0\n\t" - "# a[i+92] += m[92] * mu\n\t" - "ldr r7, [%[m], #368]\n\t" - "ldr r9, [%[a], #368]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #368]\n\t" + /* a[i+96] += m[96] * mu */ + "ldr r7, [%[m], #384]\n\t" + "ldr r10, [%[a], #384]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+93] += m[93] * mu\n\t" - "ldr r7, [%[m], #372]\n\t" - "ldr r9, [%[a], #372]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #384]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+97] += m[97] * mu */ + "ldr r7, [%[m], #388]\n\t" + "ldr r10, [%[a], #388]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #372]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #388]\n\t" "adc r4, r4, #0\n\t" - "# a[i+94] += m[94] * mu\n\t" - "ldr r7, [%[m], #376]\n\t" - "ldr r9, [%[a], #376]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #376]\n\t" + /* a[i+98] += m[98] * mu */ + "ldr r7, [%[m], #392]\n\t" + "ldr r10, [%[a], #392]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+95] += m[95] * mu\n\t" - "ldr r7, [%[m], #380]\n\t" - "ldr r9, [%[a], #380]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #392]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+99] += m[99] * mu */ + "ldr r7, [%[m], #396]\n\t" + "ldr r10, [%[a], #396]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #380]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #396]\n\t" "adc r4, r4, #0\n\t" - "# a[i+96] += m[96] * mu\n\t" - "ldr r7, [%[m], #384]\n\t" - "ldr r9, [%[a], #384]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #384]\n\t" + /* a[i+100] += m[100] * mu */ + "ldr r7, [%[m], #400]\n\t" + "ldr r10, [%[a], #400]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+97] += m[97] * mu\n\t" - "ldr r7, [%[m], #388]\n\t" - "ldr r9, [%[a], #388]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #400]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+101] += m[101] * mu */ + "ldr r7, [%[m], #404]\n\t" + "ldr r10, [%[a], #404]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #388]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #404]\n\t" "adc r4, r4, #0\n\t" - "# a[i+98] += m[98] * mu\n\t" - "ldr r7, [%[m], #392]\n\t" - "ldr r9, [%[a], #392]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #392]\n\t" + /* a[i+102] += m[102] * mu */ + "ldr r7, [%[m], #408]\n\t" + "ldr r10, [%[a], #408]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+99] += m[99] * mu\n\t" - "ldr r7, [%[m], #396]\n\t" - "ldr r9, [%[a], #396]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #408]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+103] += m[103] * mu */ + "ldr r7, [%[m], #412]\n\t" + "ldr r10, [%[a], #412]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #396]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #412]\n\t" "adc r4, r4, #0\n\t" - "# a[i+100] += m[100] * mu\n\t" - "ldr r7, [%[m], #400]\n\t" - "ldr r9, [%[a], #400]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #400]\n\t" + /* a[i+104] += m[104] * mu */ + "ldr r7, [%[m], #416]\n\t" + "ldr r10, [%[a], #416]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+101] += m[101] * mu\n\t" - "ldr r7, [%[m], #404]\n\t" - "ldr r9, [%[a], #404]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #416]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+105] += m[105] * mu */ + "ldr r7, [%[m], #420]\n\t" + "ldr r10, [%[a], #420]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #404]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #420]\n\t" "adc r4, r4, #0\n\t" - "# a[i+102] += m[102] * mu\n\t" - "ldr r7, [%[m], #408]\n\t" - "ldr r9, [%[a], #408]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #408]\n\t" + /* a[i+106] += m[106] * mu */ + "ldr r7, [%[m], #424]\n\t" + "ldr r10, [%[a], #424]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+103] += m[103] * mu\n\t" - "ldr r7, [%[m], #412]\n\t" - "ldr r9, [%[a], #412]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #424]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+107] += m[107] * mu */ + "ldr r7, [%[m], #428]\n\t" + "ldr r10, [%[a], #428]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #412]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #428]\n\t" "adc r4, r4, #0\n\t" - "# a[i+104] += m[104] * mu\n\t" - "ldr r7, [%[m], #416]\n\t" - "ldr r9, [%[a], #416]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #416]\n\t" + /* a[i+108] += m[108] * mu */ + "ldr r7, [%[m], #432]\n\t" + "ldr r10, [%[a], #432]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+105] += m[105] * mu\n\t" - "ldr r7, [%[m], #420]\n\t" - "ldr r9, [%[a], #420]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #432]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+109] += m[109] * mu */ + "ldr r7, [%[m], #436]\n\t" + "ldr r10, [%[a], #436]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #420]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #436]\n\t" "adc r4, r4, #0\n\t" - "# a[i+106] += m[106] * mu\n\t" - "ldr r7, [%[m], #424]\n\t" - "ldr r9, [%[a], #424]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #424]\n\t" + /* a[i+110] += m[110] * mu */ + "ldr r7, [%[m], #440]\n\t" + "ldr r10, [%[a], #440]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+107] += m[107] * mu\n\t" - "ldr r7, [%[m], #428]\n\t" - "ldr r9, [%[a], #428]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #440]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+111] += m[111] * mu */ + "ldr r7, [%[m], #444]\n\t" + "ldr r10, [%[a], #444]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #428]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #444]\n\t" "adc r4, r4, #0\n\t" - "# a[i+108] += m[108] * mu\n\t" - "ldr r7, [%[m], #432]\n\t" - "ldr r9, [%[a], #432]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #432]\n\t" + /* a[i+112] += m[112] * mu */ + "ldr r7, [%[m], #448]\n\t" + "ldr r10, [%[a], #448]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+109] += m[109] * mu\n\t" - "ldr r7, [%[m], #436]\n\t" - "ldr r9, [%[a], #436]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #448]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+113] += m[113] * mu */ + "ldr r7, [%[m], #452]\n\t" + "ldr r10, [%[a], #452]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #436]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #452]\n\t" "adc r4, r4, #0\n\t" - "# a[i+110] += m[110] * mu\n\t" - "ldr r7, [%[m], #440]\n\t" - "ldr r9, [%[a], #440]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #440]\n\t" + /* a[i+114] += m[114] * mu */ + "ldr r7, [%[m], #456]\n\t" + "ldr r10, [%[a], #456]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+111] += m[111] * mu\n\t" - "ldr r7, [%[m], #444]\n\t" - "ldr r9, [%[a], #444]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #456]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+115] += m[115] * mu */ + "ldr r7, [%[m], #460]\n\t" + "ldr r10, [%[a], #460]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #444]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #460]\n\t" "adc r4, r4, #0\n\t" - "# a[i+112] += m[112] * mu\n\t" - "ldr r7, [%[m], #448]\n\t" - "ldr r9, [%[a], #448]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #448]\n\t" + /* a[i+116] += m[116] * mu */ + "ldr r7, [%[m], #464]\n\t" + "ldr r10, [%[a], #464]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+113] += m[113] * mu\n\t" - "ldr r7, [%[m], #452]\n\t" - "ldr r9, [%[a], #452]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #464]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+117] += m[117] * mu */ + "ldr r7, [%[m], #468]\n\t" + "ldr r10, [%[a], #468]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #452]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #468]\n\t" "adc r4, r4, #0\n\t" - "# a[i+114] += m[114] * mu\n\t" - "ldr r7, [%[m], #456]\n\t" - "ldr r9, [%[a], #456]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #456]\n\t" + /* a[i+118] += m[118] * mu */ + "ldr r7, [%[m], #472]\n\t" + "ldr r10, [%[a], #472]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+115] += m[115] * mu\n\t" - "ldr r7, [%[m], #460]\n\t" - "ldr r9, [%[a], #460]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #472]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+119] += m[119] * mu */ + "ldr r7, [%[m], #476]\n\t" + "ldr r10, [%[a], #476]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #460]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #476]\n\t" "adc r4, r4, #0\n\t" - "# a[i+116] += m[116] * mu\n\t" - "ldr r7, [%[m], #464]\n\t" - "ldr r9, [%[a], #464]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #464]\n\t" + /* a[i+120] += m[120] * mu */ + "ldr r7, [%[m], #480]\n\t" + "ldr r10, [%[a], #480]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+117] += m[117] * mu\n\t" - "ldr r7, [%[m], #468]\n\t" - "ldr r9, [%[a], #468]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #480]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+121] += m[121] * mu */ + "ldr r7, [%[m], #484]\n\t" + "ldr r10, [%[a], #484]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #468]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #484]\n\t" "adc r4, r4, #0\n\t" - "# a[i+118] += m[118] * mu\n\t" - "ldr r7, [%[m], #472]\n\t" - "ldr r9, [%[a], #472]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #472]\n\t" + /* a[i+122] += m[122] * mu */ + "ldr r7, [%[m], #488]\n\t" + "ldr r10, [%[a], #488]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+119] += m[119] * mu\n\t" - "ldr r7, [%[m], #476]\n\t" - "ldr r9, [%[a], #476]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #488]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+123] += m[123] * mu */ + "ldr r7, [%[m], #492]\n\t" + "ldr r10, [%[a], #492]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #476]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #492]\n\t" "adc r4, r4, #0\n\t" - "# a[i+120] += m[120] * mu\n\t" - "ldr r7, [%[m], #480]\n\t" - "ldr r9, [%[a], #480]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #480]\n\t" + /* a[i+124] += m[124] * mu */ + "ldr r7, [%[m], #496]\n\t" + "ldr r10, [%[a], #496]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+121] += m[121] * mu\n\t" - "ldr r7, [%[m], #484]\n\t" - "ldr r9, [%[a], #484]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #496]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+125] += m[125] * mu */ + "ldr r7, [%[m], #500]\n\t" + "ldr r10, [%[a], #500]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #484]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #500]\n\t" "adc r4, r4, #0\n\t" - "# a[i+122] += m[122] * mu\n\t" - "ldr r7, [%[m], #488]\n\t" - "ldr r9, [%[a], #488]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #488]\n\t" + /* a[i+126] += m[126] * mu */ + "ldr r7, [%[m], #504]\n\t" + "ldr r10, [%[a], #504]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+123] += m[123] * mu\n\t" - "ldr r7, [%[m], #492]\n\t" - "ldr r9, [%[a], #492]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #492]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+124] += m[124] * mu\n\t" - "ldr r7, [%[m], #496]\n\t" - "ldr r9, [%[a], #496]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #496]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #504]\n\t" "adc r5, r5, #0\n\t" - "# a[i+125] += m[125] * mu\n\t" - "ldr r7, [%[m], #500]\n\t" - "ldr r9, [%[a], #500]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #500]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+126] += m[126] * mu\n\t" - "ldr r7, [%[m], #504]\n\t" - "ldr r9, [%[a], #504]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #504]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+127] += m[127] * mu\n\t" + /* a[i+127] += m[127] * mu */ +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r7, [%[m], #508]\n\t" - "ldr r9, [%[a], #508]\n\t" +#else + "ldr r11, [%[m], #508]\n\t" +#endif + "ldr r10, [%[a], #508]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #508]\n\t" - "ldr r9, [%[a], #512]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #512]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #508]\n\t" + "ldr r10, [%[a], #512]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #512]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #512\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #0x200\n\t" + "blt L_sp_4096_mont_reduce_128_word_%=\n\t" + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); - - sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca); + sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - mp); } /* Multiply two Montgomery form numbers mod the modulus (prime). @@ -23565,31 +55995,30 @@ SP_NOINLINE static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #512\n\t" - "\n1:\n\t" - "rsbs %[c], %[c], #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x200\n\t" + "\n" + "L_sp_4096_sub_128_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r4, r4\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_4096_sub_128_word_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -23599,246 +56028,243 @@ static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV /* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. @@ -23850,57 +56276,184 @@ static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, */ static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, sp_digit div) +{ + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_4096_word_128_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_4096_word_128_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -24013,1454 +56566,1452 @@ static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m) */ static sp_int32 sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) { - sp_digit r = -1; - sp_digit one = 1; - - + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #508\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r4, #0x1\n\t" + "lsl r4, r4, #8\n\t" + "add r4, r4, #0xfc\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #508]\n\t" - "ldr r5, [%[b], #508]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #504]\n\t" - "ldr r5, [%[b], #504]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #500]\n\t" - "ldr r5, [%[b], #500]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #496]\n\t" - "ldr r5, [%[b], #496]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #492]\n\t" - "ldr r5, [%[b], #492]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #488]\n\t" - "ldr r5, [%[b], #488]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #484]\n\t" - "ldr r5, [%[b], #484]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #480]\n\t" - "ldr r5, [%[b], #480]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #476]\n\t" - "ldr r5, [%[b], #476]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #472]\n\t" - "ldr r5, [%[b], #472]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #468]\n\t" - "ldr r5, [%[b], #468]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #464]\n\t" - "ldr r5, [%[b], #464]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #460]\n\t" - "ldr r5, [%[b], #460]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #456]\n\t" - "ldr r5, [%[b], #456]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #452]\n\t" - "ldr r5, [%[b], #452]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #448]\n\t" - "ldr r5, [%[b], #448]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #444]\n\t" - "ldr r5, [%[b], #444]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #440]\n\t" - "ldr r5, [%[b], #440]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #436]\n\t" - "ldr r5, [%[b], #436]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #432]\n\t" - "ldr r5, [%[b], #432]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #428]\n\t" - "ldr r5, [%[b], #428]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #424]\n\t" - "ldr r5, [%[b], #424]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #420]\n\t" - "ldr r5, [%[b], #420]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #416]\n\t" - "ldr r5, [%[b], #416]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #412]\n\t" - "ldr r5, [%[b], #412]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #408]\n\t" - "ldr r5, [%[b], #408]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #404]\n\t" - "ldr r5, [%[b], #404]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #400]\n\t" - "ldr r5, [%[b], #400]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #396]\n\t" - "ldr r5, [%[b], #396]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #392]\n\t" - "ldr r5, [%[b], #392]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #388]\n\t" - "ldr r5, [%[b], #388]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #384]\n\t" - "ldr r5, [%[b], #384]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #380]\n\t" - "ldr r5, [%[b], #380]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #376]\n\t" - "ldr r5, [%[b], #376]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #372]\n\t" - "ldr r5, [%[b], #372]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #368]\n\t" - "ldr r5, [%[b], #368]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #364]\n\t" - "ldr r5, [%[b], #364]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #360]\n\t" - "ldr r5, [%[b], #360]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #356]\n\t" - "ldr r5, [%[b], #356]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #352]\n\t" - "ldr r5, [%[b], #352]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #348]\n\t" - "ldr r5, [%[b], #348]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #344]\n\t" - "ldr r5, [%[b], #344]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #340]\n\t" - "ldr r5, [%[b], #340]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #336]\n\t" - "ldr r5, [%[b], #336]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #332]\n\t" - "ldr r5, [%[b], #332]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #328]\n\t" - "ldr r5, [%[b], #328]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #324]\n\t" - "ldr r5, [%[b], #324]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #320]\n\t" - "ldr r5, [%[b], #320]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #316]\n\t" - "ldr r5, [%[b], #316]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #312]\n\t" - "ldr r5, [%[b], #312]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #308]\n\t" - "ldr r5, [%[b], #308]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #304]\n\t" - "ldr r5, [%[b], #304]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #300]\n\t" - "ldr r5, [%[b], #300]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #296]\n\t" - "ldr r5, [%[b], #296]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #292]\n\t" - "ldr r5, [%[b], #292]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #288]\n\t" - "ldr r5, [%[b], #288]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #284]\n\t" - "ldr r5, [%[b], #284]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #280]\n\t" - "ldr r5, [%[b], #280]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #276]\n\t" - "ldr r5, [%[b], #276]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #272]\n\t" - "ldr r5, [%[b], #272]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #268]\n\t" - "ldr r5, [%[b], #268]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #264]\n\t" - "ldr r5, [%[b], #264]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #260]\n\t" - "ldr r5, [%[b], #260]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #256]\n\t" - "ldr r5, [%[b], #256]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #252]\n\t" - "ldr r5, [%[b], #252]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #248]\n\t" - "ldr r5, [%[b], #248]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #244]\n\t" - "ldr r5, [%[b], #244]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #240]\n\t" - "ldr r5, [%[b], #240]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #236]\n\t" - "ldr r5, [%[b], #236]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #232]\n\t" - "ldr r5, [%[b], #232]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #228]\n\t" - "ldr r5, [%[b], #228]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #224]\n\t" - "ldr r5, [%[b], #224]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #220]\n\t" - "ldr r5, [%[b], #220]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #216]\n\t" - "ldr r5, [%[b], #216]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #212]\n\t" - "ldr r5, [%[b], #212]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #208]\n\t" - "ldr r5, [%[b], #208]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #204]\n\t" - "ldr r5, [%[b], #204]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #200]\n\t" - "ldr r5, [%[b], #200]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #196]\n\t" - "ldr r5, [%[b], #196]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #192]\n\t" - "ldr r5, [%[b], #192]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #188]\n\t" - "ldr r5, [%[b], #188]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #184]\n\t" - "ldr r5, [%[b], #184]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #180]\n\t" - "ldr r5, [%[b], #180]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #176]\n\t" - "ldr r5, [%[b], #176]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #172]\n\t" - "ldr r5, [%[b], #172]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #168]\n\t" - "ldr r5, [%[b], #168]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #164]\n\t" - "ldr r5, [%[b], #164]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #160]\n\t" - "ldr r5, [%[b], #160]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #156]\n\t" - "ldr r5, [%[b], #156]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #152]\n\t" - "ldr r5, [%[b], #152]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #148]\n\t" - "ldr r5, [%[b], #148]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #144]\n\t" - "ldr r5, [%[b], #144]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #140]\n\t" - "ldr r5, [%[b], #140]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #136]\n\t" - "ldr r5, [%[b], #136]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #132]\n\t" - "ldr r5, [%[b], #132]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #128]\n\t" - "ldr r5, [%[b], #128]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" - "it hi\n\t" - "movhi %[r], %[one]\n\t" - "it lo\n\t" - "movlo %[r], r3\n\t" - "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "mov r4, #0x1fc\n\t" #endif - - return r; + "\n" + "L_sp_4096_cmp_128_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_4096_cmp_128_words_%=\n\t" + "eor r2, r2, r3\n\t" +#else + "ldr r12, [%[a], #508]\n\t" + "ldr lr, [%[b], #508]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #504]\n\t" + "ldr lr, [%[b], #504]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #500]\n\t" + "ldr lr, [%[b], #500]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #496]\n\t" + "ldr lr, [%[b], #496]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #492]\n\t" + "ldr lr, [%[b], #492]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #488]\n\t" + "ldr lr, [%[b], #488]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #484]\n\t" + "ldr lr, [%[b], #484]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #480]\n\t" + "ldr lr, [%[b], #480]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #476]\n\t" + "ldr lr, [%[b], #476]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #472]\n\t" + "ldr lr, [%[b], #472]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #468]\n\t" + "ldr lr, [%[b], #468]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #464]\n\t" + "ldr lr, [%[b], #464]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #460]\n\t" + "ldr lr, [%[b], #460]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #456]\n\t" + "ldr lr, [%[b], #456]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #452]\n\t" + "ldr lr, [%[b], #452]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #448]\n\t" + "ldr lr, [%[b], #448]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #444]\n\t" + "ldr lr, [%[b], #444]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #440]\n\t" + "ldr lr, [%[b], #440]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #436]\n\t" + "ldr lr, [%[b], #436]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #432]\n\t" + "ldr lr, [%[b], #432]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #428]\n\t" + "ldr lr, [%[b], #428]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #424]\n\t" + "ldr lr, [%[b], #424]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #420]\n\t" + "ldr lr, [%[b], #420]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #416]\n\t" + "ldr lr, [%[b], #416]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #412]\n\t" + "ldr lr, [%[b], #412]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #408]\n\t" + "ldr lr, [%[b], #408]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #404]\n\t" + "ldr lr, [%[b], #404]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #400]\n\t" + "ldr lr, [%[b], #400]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #396]\n\t" + "ldr lr, [%[b], #396]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #392]\n\t" + "ldr lr, [%[b], #392]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #388]\n\t" + "ldr lr, [%[b], #388]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #384]\n\t" + "ldr lr, [%[b], #384]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #380]\n\t" + "ldr lr, [%[b], #380]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #376]\n\t" + "ldr lr, [%[b], #376]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #372]\n\t" + "ldr lr, [%[b], #372]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #368]\n\t" + "ldr lr, [%[b], #368]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #364]\n\t" + "ldr lr, [%[b], #364]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #360]\n\t" + "ldr lr, [%[b], #360]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #356]\n\t" + "ldr lr, [%[b], #356]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #352]\n\t" + "ldr lr, [%[b], #352]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #348]\n\t" + "ldr lr, [%[b], #348]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #344]\n\t" + "ldr lr, [%[b], #344]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #340]\n\t" + "ldr lr, [%[b], #340]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #336]\n\t" + "ldr lr, [%[b], #336]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #332]\n\t" + "ldr lr, [%[b], #332]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #328]\n\t" + "ldr lr, [%[b], #328]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #324]\n\t" + "ldr lr, [%[b], #324]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #320]\n\t" + "ldr lr, [%[b], #320]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #316]\n\t" + "ldr lr, [%[b], #316]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #312]\n\t" + "ldr lr, [%[b], #312]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #308]\n\t" + "ldr lr, [%[b], #308]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #304]\n\t" + "ldr lr, [%[b], #304]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #300]\n\t" + "ldr lr, [%[b], #300]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #296]\n\t" + "ldr lr, [%[b], #296]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #292]\n\t" + "ldr lr, [%[b], #292]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #288]\n\t" + "ldr lr, [%[b], #288]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #284]\n\t" + "ldr lr, [%[b], #284]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #280]\n\t" + "ldr lr, [%[b], #280]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #276]\n\t" + "ldr lr, [%[b], #276]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #272]\n\t" + "ldr lr, [%[b], #272]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #268]\n\t" + "ldr lr, [%[b], #268]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #264]\n\t" + "ldr lr, [%[b], #264]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #260]\n\t" + "ldr lr, [%[b], #260]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #256]\n\t" + "ldr lr, [%[b], #256]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #252]\n\t" + "ldr lr, [%[b], #252]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #248]\n\t" + "ldr lr, [%[b], #248]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #244]\n\t" + "ldr lr, [%[b], #244]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #240]\n\t" + "ldr lr, [%[b], #240]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #236]\n\t" + "ldr lr, [%[b], #236]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #232]\n\t" + "ldr lr, [%[b], #232]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #228]\n\t" + "ldr lr, [%[b], #228]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #224]\n\t" + "ldr lr, [%[b], #224]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #220]\n\t" + "ldr lr, [%[b], #220]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #216]\n\t" + "ldr lr, [%[b], #216]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #212]\n\t" + "ldr lr, [%[b], #212]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #208]\n\t" + "ldr lr, [%[b], #208]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #204]\n\t" + "ldr lr, [%[b], #204]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #200]\n\t" + "ldr lr, [%[b], #200]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #196]\n\t" + "ldr lr, [%[b], #196]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #192]\n\t" + "ldr lr, [%[b], #192]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #188]\n\t" + "ldr lr, [%[b], #188]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #184]\n\t" + "ldr lr, [%[b], #184]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #180]\n\t" + "ldr lr, [%[b], #180]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #176]\n\t" + "ldr lr, [%[b], #176]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #172]\n\t" + "ldr lr, [%[b], #172]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #168]\n\t" + "ldr lr, [%[b], #168]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #164]\n\t" + "ldr lr, [%[b], #164]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #160]\n\t" + "ldr lr, [%[b], #160]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #156]\n\t" + "ldr lr, [%[b], #156]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #152]\n\t" + "ldr lr, [%[b], #152]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #148]\n\t" + "ldr lr, [%[b], #148]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #144]\n\t" + "ldr lr, [%[b], #144]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #140]\n\t" + "ldr lr, [%[b], #140]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #136]\n\t" + "ldr lr, [%[b], #136]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #132]\n\t" + "ldr lr, [%[b], #132]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #128]\n\t" + "ldr lr, [%[b], #128]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #124]\n\t" + "ldr lr, [%[b], #124]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #120]\n\t" + "ldr lr, [%[b], #120]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #116]\n\t" + "ldr lr, [%[b], #116]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #112]\n\t" + "ldr lr, [%[b], #112]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #108]\n\t" + "ldr lr, [%[b], #108]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #104]\n\t" + "ldr lr, [%[b], #104]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #100]\n\t" + "ldr lr, [%[b], #100]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #96]\n\t" + "ldr lr, [%[b], #96]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #92]\n\t" + "ldr lr, [%[b], #92]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #88]\n\t" + "ldr lr, [%[b], #88]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #84]\n\t" + "ldr lr, [%[b], #84]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #80]\n\t" + "ldr lr, [%[b], #80]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #76]\n\t" + "ldr lr, [%[b], #76]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #72]\n\t" + "ldr lr, [%[b], #72]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #68]\n\t" + "ldr lr, [%[b], #68]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #64]\n\t" + "ldr lr, [%[b], #64]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #60]\n\t" + "ldr lr, [%[b], #60]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #56]\n\t" + "ldr lr, [%[b], #56]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #52]\n\t" + "ldr lr, [%[b], #52]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #48]\n\t" + "ldr lr, [%[b], #48]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" + "it hi\n\t" + "movhi r2, r6\n\t" + "it lo\n\t" + "movlo r2, r3\n\t" + "it ne\n\t" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6" + ); + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -25991,35 +58542,33 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r7, #0\n\t" + "mov lr, #0\n\t" "mov r6, #0\n\t" - "1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" + "mov r12, #0\n\t" + "\n" + "L_sp_4096_cond_add_64_words_%=: \n\t" + "adds lr, lr, #-1\n\t" + "ldr r4, [%[a], r12]\n\t" + "ldr r5, [%[b], r12]\n\t" "and r5, r5, %[m]\n\t" "adcs r4, r4, r5\n\t" - "adc %[c], r7, r7\n\t" - "str r4, [%[r], r6]\n\t" - "add r6, r6, #4\n\t" - "cmp r6, #256\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7" + "adc lr, r6, r6\n\t" + "str r4, [%[r], r12]\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #0x100\n\t" + "blt L_sp_4096_cond_add_64_words_%=\n\t" + "mov %[r], lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)r; } -#endif /* WOLFSSL_SP_SMALL */ -#ifndef WOLFSSL_SP_SMALL +#else /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -26028,632 +58577,243 @@ static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_dig * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r8, #0\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" -#else - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" -#endif + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" -#else - "strd r4, r5, [%[r], #0]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" -#else - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #8]\n\t" - "str r5, [%[r], #12]\n\t" -#else - "strd r4, r5, [%[r], #8]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" -#else - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" -#else - "strd r4, r5, [%[r], #16]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" -#else - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #24]\n\t" - "str r5, [%[r], #28]\n\t" -#else - "strd r4, r5, [%[r], #24]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" -#else - "ldrd r4, r5, [%[a], #32]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" -#else - "strd r4, r5, [%[r], #32]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" -#else - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #40]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #40]\n\t" - "str r5, [%[r], #44]\n\t" -#else - "strd r4, r5, [%[r], #40]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[a], #52]\n\t" - "ldr r6, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" -#else - "ldrd r4, r5, [%[a], #48]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #48]\n\t" - "str r5, [%[r], #52]\n\t" -#else - "strd r4, r5, [%[r], #48]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[a], #60]\n\t" - "ldr r6, [%[b], #56]\n\t" - "ldr r7, [%[b], #60]\n\t" -#else - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #56]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #56]\n\t" - "str r5, [%[r], #60]\n\t" -#else - "strd r4, r5, [%[r], #56]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[a], #68]\n\t" - "ldr r6, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" -#else - "ldrd r4, r5, [%[a], #64]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #64]\n\t" - "str r5, [%[r], #68]\n\t" -#else - "strd r4, r5, [%[r], #64]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[a], #76]\n\t" - "ldr r6, [%[b], #72]\n\t" - "ldr r7, [%[b], #76]\n\t" -#else - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #72]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #72]\n\t" - "str r5, [%[r], #76]\n\t" -#else - "strd r4, r5, [%[r], #72]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[a], #84]\n\t" - "ldr r6, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" -#else - "ldrd r4, r5, [%[a], #80]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #80]\n\t" - "str r5, [%[r], #84]\n\t" -#else - "strd r4, r5, [%[r], #80]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[a], #92]\n\t" - "ldr r6, [%[b], #88]\n\t" - "ldr r7, [%[b], #92]\n\t" -#else - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #88]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #88]\n\t" - "str r5, [%[r], #92]\n\t" -#else - "strd r4, r5, [%[r], #88]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[a], #100]\n\t" - "ldr r6, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" -#else - "ldrd r4, r5, [%[a], #96]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #96]\n\t" - "str r5, [%[r], #100]\n\t" -#else - "strd r4, r5, [%[r], #96]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[a], #108]\n\t" - "ldr r6, [%[b], #104]\n\t" - "ldr r7, [%[b], #108]\n\t" -#else - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #104]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #104]\n\t" - "str r5, [%[r], #108]\n\t" -#else - "strd r4, r5, [%[r], #104]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[a], #116]\n\t" - "ldr r6, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" -#else - "ldrd r4, r5, [%[a], #112]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #112]\n\t" - "str r5, [%[r], #116]\n\t" -#else - "strd r4, r5, [%[r], #112]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[a], #124]\n\t" - "ldr r6, [%[b], #120]\n\t" - "ldr r7, [%[b], #124]\n\t" -#else - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #120]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #120]\n\t" - "str r5, [%[r], #124]\n\t" -#else - "strd r4, r5, [%[r], #120]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #128]\n\t" - "ldr r5, [%[a], #132]\n\t" - "ldr r6, [%[b], #128]\n\t" - "ldr r7, [%[b], #132]\n\t" -#else - "ldrd r4, r5, [%[a], #128]\n\t" - "ldrd r6, r7, [%[b], #128]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #128]\n\t" - "str r5, [%[r], #132]\n\t" -#else - "strd r4, r5, [%[r], #128]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #136]\n\t" - "ldr r5, [%[a], #140]\n\t" - "ldr r6, [%[b], #136]\n\t" - "ldr r7, [%[b], #140]\n\t" -#else - "ldrd r4, r5, [%[a], #136]\n\t" - "ldrd r6, r7, [%[b], #136]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #136]\n\t" - "str r5, [%[r], #140]\n\t" -#else - "strd r4, r5, [%[r], #136]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #144]\n\t" - "ldr r5, [%[a], #148]\n\t" - "ldr r6, [%[b], #144]\n\t" - "ldr r7, [%[b], #148]\n\t" -#else - "ldrd r4, r5, [%[a], #144]\n\t" - "ldrd r6, r7, [%[b], #144]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #144]\n\t" - "str r5, [%[r], #148]\n\t" -#else - "strd r4, r5, [%[r], #144]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #152]\n\t" - "ldr r5, [%[a], #156]\n\t" - "ldr r6, [%[b], #152]\n\t" - "ldr r7, [%[b], #156]\n\t" -#else - "ldrd r4, r5, [%[a], #152]\n\t" - "ldrd r6, r7, [%[b], #152]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #152]\n\t" - "str r5, [%[r], #156]\n\t" -#else - "strd r4, r5, [%[r], #152]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #160]\n\t" - "ldr r5, [%[a], #164]\n\t" - "ldr r6, [%[b], #160]\n\t" - "ldr r7, [%[b], #164]\n\t" -#else - "ldrd r4, r5, [%[a], #160]\n\t" - "ldrd r6, r7, [%[b], #160]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #160]\n\t" - "str r5, [%[r], #164]\n\t" -#else - "strd r4, r5, [%[r], #160]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #168]\n\t" - "ldr r5, [%[a], #172]\n\t" - "ldr r6, [%[b], #168]\n\t" - "ldr r7, [%[b], #172]\n\t" -#else - "ldrd r4, r5, [%[a], #168]\n\t" - "ldrd r6, r7, [%[b], #168]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #168]\n\t" - "str r5, [%[r], #172]\n\t" -#else - "strd r4, r5, [%[r], #168]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #176]\n\t" - "ldr r5, [%[a], #180]\n\t" - "ldr r6, [%[b], #176]\n\t" - "ldr r7, [%[b], #180]\n\t" -#else - "ldrd r4, r5, [%[a], #176]\n\t" - "ldrd r6, r7, [%[b], #176]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #176]\n\t" - "str r5, [%[r], #180]\n\t" -#else - "strd r4, r5, [%[r], #176]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #184]\n\t" - "ldr r5, [%[a], #188]\n\t" - "ldr r6, [%[b], #184]\n\t" - "ldr r7, [%[b], #188]\n\t" -#else - "ldrd r4, r5, [%[a], #184]\n\t" - "ldrd r6, r7, [%[b], #184]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #184]\n\t" - "str r5, [%[r], #188]\n\t" -#else - "strd r4, r5, [%[r], #184]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #192]\n\t" - "ldr r5, [%[a], #196]\n\t" - "ldr r6, [%[b], #192]\n\t" - "ldr r7, [%[b], #196]\n\t" -#else - "ldrd r4, r5, [%[a], #192]\n\t" - "ldrd r6, r7, [%[b], #192]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #192]\n\t" - "str r5, [%[r], #196]\n\t" -#else - "strd r4, r5, [%[r], #192]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #200]\n\t" - "ldr r5, [%[a], #204]\n\t" - "ldr r6, [%[b], #200]\n\t" - "ldr r7, [%[b], #204]\n\t" -#else - "ldrd r4, r5, [%[a], #200]\n\t" - "ldrd r6, r7, [%[b], #200]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #200]\n\t" - "str r5, [%[r], #204]\n\t" -#else - "strd r4, r5, [%[r], #200]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #208]\n\t" - "ldr r5, [%[a], #212]\n\t" - "ldr r6, [%[b], #208]\n\t" - "ldr r7, [%[b], #212]\n\t" -#else - "ldrd r4, r5, [%[a], #208]\n\t" - "ldrd r6, r7, [%[b], #208]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #208]\n\t" - "str r5, [%[r], #212]\n\t" -#else - "strd r4, r5, [%[r], #208]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #216]\n\t" - "ldr r5, [%[a], #220]\n\t" - "ldr r6, [%[b], #216]\n\t" - "ldr r7, [%[b], #220]\n\t" -#else - "ldrd r4, r5, [%[a], #216]\n\t" - "ldrd r6, r7, [%[b], #216]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #216]\n\t" - "str r5, [%[r], #220]\n\t" -#else - "strd r4, r5, [%[r], #216]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #224]\n\t" - "ldr r5, [%[a], #228]\n\t" - "ldr r6, [%[b], #224]\n\t" - "ldr r7, [%[b], #228]\n\t" -#else - "ldrd r4, r5, [%[a], #224]\n\t" - "ldrd r6, r7, [%[b], #224]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #224]\n\t" - "str r5, [%[r], #228]\n\t" -#else - "strd r4, r5, [%[r], #224]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #232]\n\t" - "ldr r5, [%[a], #236]\n\t" - "ldr r6, [%[b], #232]\n\t" - "ldr r7, [%[b], #236]\n\t" -#else - "ldrd r4, r5, [%[a], #232]\n\t" - "ldrd r6, r7, [%[b], #232]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #232]\n\t" - "str r5, [%[r], #236]\n\t" -#else - "strd r4, r5, [%[r], #232]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #240]\n\t" - "ldr r5, [%[a], #244]\n\t" - "ldr r6, [%[b], #240]\n\t" - "ldr r7, [%[b], #244]\n\t" -#else - "ldrd r4, r5, [%[a], #240]\n\t" - "ldrd r6, r7, [%[b], #240]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #240]\n\t" - "str r5, [%[r], #244]\n\t" -#else - "strd r4, r5, [%[r], #240]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #248]\n\t" - "ldr r5, [%[a], #252]\n\t" - "ldr r6, [%[b], #248]\n\t" - "ldr r7, [%[b], #252]\n\t" -#else - "ldrd r4, r5, [%[a], #248]\n\t" - "ldrd r6, r7, [%[b], #248]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #248]\n\t" - "str r5, [%[r], #252]\n\t" -#else - "strd r4, r5, [%[r], #248]\n\t" -#endif - "adc %[c], r8, r8\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8" + "stm %[r]!, {r4, r5}\n\t" + "adc %[r], r8, r8\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } -#endif /* !WOLFSSL_SP_SMALL */ +#endif /* WOLFSSL_SP_SMALL */ /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -26971,778 +59131,778 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, static void sp_4096_lshift_128(sp_digit* r, const sp_digit* a, byte n) { __asm__ __volatile__ ( - "rsb r6, %[n], #31\n\t" - "ldr r3, [%[a], #508]\n\t" - "lsr r4, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r4, r4, r6\n\t" - "ldr r2, [%[a], #504]\n\t" - "str r4, [%[r], #512]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #500]\n\t" - "str r3, [%[r], #508]\n\t" - "lsr r5, r4, #1\n\t" + "rsb r12, %[n], #31\n\t" + "ldr r5, [%[a], #508]\n\t" + "lsr r6, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r6, r6, r12\n\t" + "ldr r4, [%[a], #504]\n\t" + "str r6, [%[r], #512]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #496]\n\t" - "str r2, [%[r], #504]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #492]\n\t" - "str r4, [%[r], #500]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #488]\n\t" - "str r3, [%[r], #496]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #500]\n\t" + "str r5, [%[r], #508]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #496]\n\t" + "str r4, [%[r], #504]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #492]\n\t" + "str r6, [%[r], #500]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #484]\n\t" - "str r2, [%[r], #492]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #480]\n\t" - "str r4, [%[r], #488]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #476]\n\t" - "str r3, [%[r], #484]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #488]\n\t" + "str r5, [%[r], #496]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #484]\n\t" + "str r4, [%[r], #492]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #480]\n\t" + "str r6, [%[r], #488]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #472]\n\t" - "str r2, [%[r], #480]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #468]\n\t" - "str r4, [%[r], #476]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #464]\n\t" - "str r3, [%[r], #472]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #476]\n\t" + "str r5, [%[r], #484]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #472]\n\t" + "str r4, [%[r], #480]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #468]\n\t" + "str r6, [%[r], #476]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #460]\n\t" - "str r2, [%[r], #468]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #456]\n\t" - "str r4, [%[r], #464]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #452]\n\t" - "str r3, [%[r], #460]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #464]\n\t" + "str r5, [%[r], #472]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #460]\n\t" + "str r4, [%[r], #468]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #456]\n\t" + "str r6, [%[r], #464]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #448]\n\t" - "str r2, [%[r], #456]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #444]\n\t" - "str r4, [%[r], #452]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #440]\n\t" - "str r3, [%[r], #448]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #452]\n\t" + "str r5, [%[r], #460]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #448]\n\t" + "str r4, [%[r], #456]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #444]\n\t" + "str r6, [%[r], #452]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #436]\n\t" - "str r2, [%[r], #444]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #432]\n\t" - "str r4, [%[r], #440]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #428]\n\t" - "str r3, [%[r], #436]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #440]\n\t" + "str r5, [%[r], #448]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #436]\n\t" + "str r4, [%[r], #444]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #432]\n\t" + "str r6, [%[r], #440]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #424]\n\t" - "str r2, [%[r], #432]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #420]\n\t" - "str r4, [%[r], #428]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #416]\n\t" - "str r3, [%[r], #424]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #428]\n\t" + "str r5, [%[r], #436]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #424]\n\t" + "str r4, [%[r], #432]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #420]\n\t" + "str r6, [%[r], #428]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #412]\n\t" - "str r2, [%[r], #420]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #408]\n\t" - "str r4, [%[r], #416]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #404]\n\t" - "str r3, [%[r], #412]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #416]\n\t" + "str r5, [%[r], #424]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #412]\n\t" + "str r4, [%[r], #420]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #408]\n\t" + "str r6, [%[r], #416]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #400]\n\t" - "str r2, [%[r], #408]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #396]\n\t" - "str r4, [%[r], #404]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #392]\n\t" - "str r3, [%[r], #400]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #404]\n\t" + "str r5, [%[r], #412]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #400]\n\t" + "str r4, [%[r], #408]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #396]\n\t" + "str r6, [%[r], #404]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #388]\n\t" - "str r2, [%[r], #396]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #384]\n\t" - "str r4, [%[r], #392]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #380]\n\t" - "str r3, [%[r], #388]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #392]\n\t" + "str r5, [%[r], #400]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #388]\n\t" + "str r4, [%[r], #396]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #384]\n\t" + "str r6, [%[r], #392]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #376]\n\t" - "str r2, [%[r], #384]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #372]\n\t" - "str r4, [%[r], #380]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #368]\n\t" - "str r3, [%[r], #376]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #380]\n\t" + "str r5, [%[r], #388]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #376]\n\t" + "str r4, [%[r], #384]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #372]\n\t" + "str r6, [%[r], #380]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #364]\n\t" - "str r2, [%[r], #372]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #360]\n\t" - "str r4, [%[r], #368]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #356]\n\t" - "str r3, [%[r], #364]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #368]\n\t" + "str r5, [%[r], #376]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #364]\n\t" + "str r4, [%[r], #372]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #360]\n\t" + "str r6, [%[r], #368]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #352]\n\t" - "str r2, [%[r], #360]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #348]\n\t" - "str r4, [%[r], #356]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #344]\n\t" - "str r3, [%[r], #352]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #356]\n\t" + "str r5, [%[r], #364]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #352]\n\t" + "str r4, [%[r], #360]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #348]\n\t" + "str r6, [%[r], #356]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #340]\n\t" - "str r2, [%[r], #348]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #336]\n\t" - "str r4, [%[r], #344]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #332]\n\t" - "str r3, [%[r], #340]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #344]\n\t" + "str r5, [%[r], #352]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #340]\n\t" + "str r4, [%[r], #348]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #336]\n\t" + "str r6, [%[r], #344]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #328]\n\t" - "str r2, [%[r], #336]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #324]\n\t" - "str r4, [%[r], #332]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #320]\n\t" - "str r3, [%[r], #328]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #332]\n\t" + "str r5, [%[r], #340]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #328]\n\t" + "str r4, [%[r], #336]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #324]\n\t" + "str r6, [%[r], #332]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #316]\n\t" - "str r2, [%[r], #324]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #312]\n\t" - "str r4, [%[r], #320]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #308]\n\t" - "str r3, [%[r], #316]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #320]\n\t" + "str r5, [%[r], #328]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #316]\n\t" + "str r4, [%[r], #324]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #312]\n\t" + "str r6, [%[r], #320]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #304]\n\t" - "str r2, [%[r], #312]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #300]\n\t" - "str r4, [%[r], #308]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #296]\n\t" - "str r3, [%[r], #304]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #308]\n\t" + "str r5, [%[r], #316]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #304]\n\t" + "str r4, [%[r], #312]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #300]\n\t" + "str r6, [%[r], #308]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #292]\n\t" - "str r2, [%[r], #300]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #288]\n\t" - "str r4, [%[r], #296]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #284]\n\t" - "str r3, [%[r], #292]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #296]\n\t" + "str r5, [%[r], #304]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #292]\n\t" + "str r4, [%[r], #300]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #288]\n\t" + "str r6, [%[r], #296]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #280]\n\t" - "str r2, [%[r], #288]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #276]\n\t" - "str r4, [%[r], #284]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #272]\n\t" - "str r3, [%[r], #280]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #284]\n\t" + "str r5, [%[r], #292]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #280]\n\t" + "str r4, [%[r], #288]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #276]\n\t" + "str r6, [%[r], #284]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #268]\n\t" - "str r2, [%[r], #276]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #264]\n\t" - "str r4, [%[r], #272]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #260]\n\t" - "str r3, [%[r], #268]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #272]\n\t" + "str r5, [%[r], #280]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #268]\n\t" + "str r4, [%[r], #276]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #264]\n\t" + "str r6, [%[r], #272]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #256]\n\t" - "str r2, [%[r], #264]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #252]\n\t" - "str r4, [%[r], #260]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #248]\n\t" - "str r3, [%[r], #256]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #260]\n\t" + "str r5, [%[r], #268]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #256]\n\t" + "str r4, [%[r], #264]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #252]\n\t" + "str r6, [%[r], #260]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #244]\n\t" - "str r2, [%[r], #252]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #240]\n\t" - "str r4, [%[r], #248]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #236]\n\t" - "str r3, [%[r], #244]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #248]\n\t" + "str r5, [%[r], #256]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #244]\n\t" + "str r4, [%[r], #252]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #240]\n\t" + "str r6, [%[r], #248]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #232]\n\t" - "str r2, [%[r], #240]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #228]\n\t" - "str r4, [%[r], #236]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #224]\n\t" - "str r3, [%[r], #232]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #236]\n\t" + "str r5, [%[r], #244]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #232]\n\t" + "str r4, [%[r], #240]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #228]\n\t" + "str r6, [%[r], #236]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #220]\n\t" - "str r2, [%[r], #228]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #216]\n\t" - "str r4, [%[r], #224]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #212]\n\t" - "str r3, [%[r], #220]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #224]\n\t" + "str r5, [%[r], #232]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #220]\n\t" + "str r4, [%[r], #228]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #216]\n\t" + "str r6, [%[r], #224]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #208]\n\t" - "str r2, [%[r], #216]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #204]\n\t" - "str r4, [%[r], #212]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #200]\n\t" - "str r3, [%[r], #208]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #212]\n\t" + "str r5, [%[r], #220]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #208]\n\t" + "str r4, [%[r], #216]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #204]\n\t" + "str r6, [%[r], #212]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #196]\n\t" - "str r2, [%[r], #204]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #192]\n\t" - "str r4, [%[r], #200]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #188]\n\t" - "str r3, [%[r], #196]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #200]\n\t" + "str r5, [%[r], #208]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #196]\n\t" + "str r4, [%[r], #204]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #192]\n\t" + "str r6, [%[r], #200]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #184]\n\t" - "str r2, [%[r], #192]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #180]\n\t" - "str r4, [%[r], #188]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #176]\n\t" - "str r3, [%[r], #184]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #188]\n\t" + "str r5, [%[r], #196]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #184]\n\t" + "str r4, [%[r], #192]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #180]\n\t" + "str r6, [%[r], #188]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #172]\n\t" - "str r2, [%[r], #180]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #168]\n\t" - "str r4, [%[r], #176]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #164]\n\t" - "str r3, [%[r], #172]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #176]\n\t" + "str r5, [%[r], #184]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #172]\n\t" + "str r4, [%[r], #180]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #168]\n\t" + "str r6, [%[r], #176]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #160]\n\t" - "str r2, [%[r], #168]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #156]\n\t" - "str r4, [%[r], #164]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #152]\n\t" - "str r3, [%[r], #160]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #164]\n\t" + "str r5, [%[r], #172]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #160]\n\t" + "str r4, [%[r], #168]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #156]\n\t" + "str r6, [%[r], #164]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #148]\n\t" - "str r2, [%[r], #156]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #144]\n\t" - "str r4, [%[r], #152]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #140]\n\t" - "str r3, [%[r], #148]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #152]\n\t" + "str r5, [%[r], #160]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #148]\n\t" + "str r4, [%[r], #156]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #144]\n\t" + "str r6, [%[r], #152]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #136]\n\t" - "str r2, [%[r], #144]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #132]\n\t" - "str r4, [%[r], #140]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #128]\n\t" - "str r3, [%[r], #136]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #140]\n\t" + "str r5, [%[r], #148]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #136]\n\t" + "str r4, [%[r], #144]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #132]\n\t" + "str r6, [%[r], #140]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #124]\n\t" - "str r2, [%[r], #132]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #120]\n\t" - "str r4, [%[r], #128]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #116]\n\t" - "str r3, [%[r], #124]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #128]\n\t" + "str r5, [%[r], #136]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #124]\n\t" + "str r4, [%[r], #132]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #120]\n\t" + "str r6, [%[r], #128]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #112]\n\t" - "str r2, [%[r], #120]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #108]\n\t" - "str r4, [%[r], #116]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #104]\n\t" - "str r3, [%[r], #112]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #116]\n\t" + "str r5, [%[r], #124]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #112]\n\t" + "str r4, [%[r], #120]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #108]\n\t" + "str r6, [%[r], #116]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #100]\n\t" - "str r2, [%[r], #108]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #96]\n\t" - "str r4, [%[r], #104]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #92]\n\t" - "str r3, [%[r], #100]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #104]\n\t" + "str r5, [%[r], #112]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #100]\n\t" + "str r4, [%[r], #108]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #96]\n\t" + "str r6, [%[r], #104]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #88]\n\t" - "str r2, [%[r], #96]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #84]\n\t" - "str r4, [%[r], #92]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #80]\n\t" - "str r3, [%[r], #88]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #92]\n\t" + "str r5, [%[r], #100]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #88]\n\t" + "str r4, [%[r], #96]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #84]\n\t" + "str r6, [%[r], #92]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #76]\n\t" - "str r2, [%[r], #84]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #72]\n\t" - "str r4, [%[r], #80]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #68]\n\t" - "str r3, [%[r], #76]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #80]\n\t" + "str r5, [%[r], #88]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #76]\n\t" + "str r4, [%[r], #84]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #72]\n\t" + "str r6, [%[r], #80]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #64]\n\t" - "str r2, [%[r], #72]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #60]\n\t" - "str r4, [%[r], #68]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #56]\n\t" - "str r3, [%[r], #64]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #68]\n\t" + "str r5, [%[r], #76]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #64]\n\t" + "str r4, [%[r], #72]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r6, [%[r], #68]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #52]\n\t" - "str r2, [%[r], #60]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #48]\n\t" - "str r4, [%[r], #56]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #44]\n\t" - "str r3, [%[r], #52]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #56]\n\t" + "str r5, [%[r], #64]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r6, [%[r], #56]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #40]\n\t" - "str r2, [%[r], #48]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #36]\n\t" - "str r4, [%[r], #44]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #32]\n\t" - "str r3, [%[r], #40]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #44]\n\t" + "str r5, [%[r], #52]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r6, [%[r], #44]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #28]\n\t" - "str r2, [%[r], #36]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #24]\n\t" - "str r4, [%[r], #32]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #20]\n\t" - "str r3, [%[r], #28]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #32]\n\t" + "str r5, [%[r], #40]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r6, [%[r], #32]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #16]\n\t" - "str r2, [%[r], #24]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #12]\n\t" - "str r4, [%[r], #20]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #8]\n\t" - "str r3, [%[r], #16]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #20]\n\t" + "str r5, [%[r], #28]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r6, [%[r], #20]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #4]\n\t" - "str r2, [%[r], #12]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #0]\n\t" - "str r4, [%[r], #8]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "str r2, [%[r], #0]\n\t" - "str r3, [%[r], #4]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #8]\n\t" + "str r5, [%[r], #16]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a]]\n\t" + "str r6, [%[r], #8]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : [r] "r" (r), [a] "r" (a), [n] "r" (n) - : "memory", "r2", "r3", "r4", "r5", "r6" + : "memory", "r4", "r5", "r6", "r3", "r12" ); } @@ -28038,54 +60198,81 @@ static const sp_digit p256_b[8] = { static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { __asm__ __volatile__ ( - "sub sp, sp, #64\n\t" + "sub sp, sp, #0x40\n\t" "mov r5, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" + "\n" + "L_sp_256_mul_8_outer_%=: \n\t" "subs r3, r5, #28\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_256_mul_8_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" "cmp r3, #32\n\t" - "beq 3f\n\t" + "beq L_sp_256_mul_8_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_256_mul_8_inner_%=\n\t" + "\n" + "L_sp_256_mul_8_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" "cmp r5, #56\n\t" - "ble 1b\n\t" + "ble L_sp_256_mul_8_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_256_mul_8_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "bgt L_sp_256_mul_8_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); } @@ -28101,441 +60288,2440 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) __asm__ __volatile__ ( "sub sp, sp, #32\n\t" "mov r10, #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r11, [%[a], #0]\n\t" - "ldr r12, [%[b], #0]\n\t" + /* A[0] * B[0] */ + "ldr r11, [%[a]]\n\t" + "ldr r12, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r3, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "mov r5, #0\n\t" +#else "umull r3, r4, r11, r12\n\t" "mov r5, #0\n\t" +#endif "str r3, [sp]\n\t" - "# A[0] * B[1]\n\t" + /* A[0] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[0]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[0] */ "ldr r8, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #4]\n\t" - "# A[2] * B[0]\n\t" + /* A[2] * B[0] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[1] */ "ldr r11, [%[a], #4]\n\t" "ldr r12, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[2]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[2] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #8]\n\t" - "# A[0] * B[3]\n\t" + /* A[0] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[2]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[2] */ "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[1] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[0] */ "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #12]\n\t" - "# A[4] * B[0]\n\t" + /* A[4] * B[0] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[1] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[2] */ "ldr r11, [%[a], #8]\n\t" "ldr r12, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[3] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[4]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[4] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #16]\n\t" - "# A[0] * B[5]\n\t" + /* A[0] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[4]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[4] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[2] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[1] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[0] */ "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #20]\n\t" - "# A[6] * B[0]\n\t" + /* A[6] * B[0] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[1]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[1] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[2] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[3] */ "ldr r11, [%[a], #12]\n\t" "ldr r12, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[4] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[5] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[0] * B[6]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[0] * B[6] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #24]\n\t" - "# A[0] * B[7]\n\t" + /* A[0] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[6]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[6] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[5] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[4] */ "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[3] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[2] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[1]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[1] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[0] */ "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #28]\n\t" - "# A[7] * B[1]\n\t" + /* A[7] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[6] * B[2]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[2] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[3] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[4] */ "ldr r11, [%[a], #16]\n\t" "ldr r12, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[5] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[6] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[1] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[7] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #32]\n\t" - "# A[2] * B[7]\n\t" + /* A[2] * B[7] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[3] * B[6]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[6] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[4] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[3] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[2] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #36]\n\t" - "# A[7] * B[3]\n\t" + /* A[7] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[6] * B[4]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[4] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[5] */ "ldr r11, [%[a], #20]\n\t" "ldr r12, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[6] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[7] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #40]\n\t" - "# A[4] * B[7]\n\t" + /* A[4] * B[7] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[5] * B[6]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[6] */ "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[5] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[4] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #44]\n\t" - "# A[7] * B[5]\n\t" + /* A[7] * B[5] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[6] * B[6]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[6] */ "ldr r11, [%[a], #24]\n\t" "ldr r12, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[7] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #48]\n\t" - "# A[6] * B[7]\n\t" + /* A[6] * B[7] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[7] * B[6]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[6] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #52]\n\t" - "# A[7] * B[7]\n\t" + /* A[7] * B[7] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adc r3, r3, r7\n\t" +#endif "str r5, [%[r], #56]\n\t" "str r3, [%[r], #60]\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" "stm %[r]!, {r3, r4, r5, r6}\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sub %[r], %[r], #32\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } @@ -28550,77 +62736,132 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "sub sp, sp, #64\n\t" + "sub sp, sp, #0x40\n\t" "mov r12, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" "mov r5, #0\n\t" - "\n1:\n\t" + "\n" + "L_sp_256_sqr_8_outer_%=: \n\t" "subs r3, r5, #28\n\t" "it cc\n\t" "movcc r3, r12\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" + "\n" + "L_sp_256_sqr_8_inner_%=: \n\t" "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "beq L_sp_256_sqr_8_op_sqr_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "bal L_sp_256_sqr_8_op_done_%=\n\t" + "\n" + "L_sp_256_sqr_8_op_sqr_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_256_sqr_8_op_done_%=: \n\t" "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" "cmp r3, #32\n\t" - "beq 3f\n\t" + "beq L_sp_256_sqr_8_inner_done_%=\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" + "bgt L_sp_256_sqr_8_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_256_sqr_8_inner_%=\n\t" + "\n" + "L_sp_256_sqr_8_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" "cmp r5, #56\n\t" - "ble 1b\n\t" + "ble L_sp_256_sqr_8_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r9, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r9, [%[r], #12]\n\t" -#else - "ldrd r6, r7, [sp, #0]\n\t" - "ldrd r8, r9, [sp, #8]\n\t" - "strd r6, r7, [%[r], #0]\n\t" - "strd r8, r9, [%[r], #8]\n\t" -#endif - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_256_sqr_8_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "bgt L_sp_256_sqr_8_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "r12" ); } @@ -28634,108 +62875,514 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( "sub sp, sp, #32\n\t" - "mov r12, #0\n\t" - "# A[0] * A[0]\n\t" - "ldr r10, [%[a], #0]\n\t" + /* A[0] * A[0] */ + "ldr r10, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r9, r10, #16\n\t" + "lsl r2, r10, #16\n\t" + "lsr r2, r2, #16\n\t" + "mul r8, r2, r2\n\t" + "mul r3, r9, r9\n\t" + "mul r2, r9, r2\n\t" + "lsr r9, r2, #15\n\t" + "lsl r2, r2, #17\n\t" + "adds r8, r8, r2\n\t" + "adc r3, r3, r9\n\t" +#else "umull r8, r3, r10, r10\n\t" +#endif "mov r4, #0\n\t" "str r8, [sp]\n\t" - "# A[0] * A[1]\n\t" + /* A[0] * A[1] */ "ldr r10, [%[a], #4]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #4]\n\t" - "# A[0] * A[2]\n\t" + /* A[0] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[1] * A[1]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * A[1] */ "ldr r10, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #8]\n\t" - "# A[0] * A[3]\n\t" + /* A[0] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[1] * A[2]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [sp, #12]\n\t" - "# A[0] * A[4]\n\t" + /* A[0] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[1] * A[3]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[1] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[2] * A[2]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[2] * A[2] */ "ldr r10, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #16]\n\t" - "# A[0] * A[5]\n\t" + /* A[0] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[4]\n\t" + /* A[1] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -28743,66 +63390,294 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #20]\n\t" - "# A[0] * A[6]\n\t" + /* A[0] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[5]\n\t" + /* A[1] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[3] */ "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #24]\n\t" - "# A[0] * A[7]\n\t" + /* A[0] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[6]\n\t" + /* A[1] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -28810,59 +63685,256 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #28]\n\t" - "# A[1] * A[7]\n\t" + /* A[1] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[2] * A[6]\n\t" + /* A[2] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[4] */ "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #32]\n\t" - "# A[2] * A[7]\n\t" + /* A[2] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[3] * A[6]\n\t" + /* A[3] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -28870,97 +63942,424 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #36]\n\t" - "# A[3] * A[7]\n\t" + /* A[3] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[4] * A[6]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[4] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[5] * A[5]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[5] * A[5] */ "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [%[r], #40]\n\t" - "# A[4] * A[7]\n\t" + /* A[4] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[5] * A[6]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #44]\n\t" - "# A[5] * A[7]\n\t" + /* A[5] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[6] * A[6]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * A[6] */ "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [%[r], #48]\n\t" - "# A[6] * A[7]\n\t" + /* A[6] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [%[r], #52]\n\t" - "# A[7] * A[7]\n\t" + /* A[7] * A[7] */ "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adc r2, r2, r9\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adc r2, r2, r9\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adc r2, r2, r9\n\t" +#endif "str r4, [%[r], #56]\n\t" "str r2, [%[r], #60]\n\t" "ldm sp!, {r2, r3, r4, r8}\n\t" "stm %[r]!, {r2, r3, r4, r8}\n\t" "ldm sp!, {r2, r3, r4, r8}\n\t" "stm %[r]!, {r2, r3, r4, r8}\n\t" - "sub %[r], %[r], #32\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r12" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); } @@ -28972,15 +64371,14 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #32\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #32\n\t" + "\n" + "L_sp_256_add_8_word_%=: \n\t" + "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" @@ -28989,15 +64387,15 @@ static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, "adcs r7, r7, r11\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "adc r3, r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne L_sp_256_add_8_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -29007,34 +64405,30 @@ static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "adc %[c], r14, r14\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "adc %[r], r12, r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -29045,31 +64439,30 @@ static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #32\n\t" - "\n1:\n\t" - "rsbs %[c], %[c], #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #32\n\t" + "\n" + "L_sp_256_sub_8_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r4, r4\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_256_sub_8_word_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -29079,33 +64472,29 @@ static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -29117,231 +64506,204 @@ static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, */ static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m) { - (void)m; - __asm__ __volatile__ ( "sub sp, sp, #24\n\t" - "ldr r2, [%[a], #0]\n\t" - "ldr r3, [%[a], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[a], #20]\n\t" - "ldr r8, [%[a], #24]\n\t" - "ldr r9, [%[a], #28]\n\t" - "# Clear overflow and underflow\n\t" - "mov r14, #0\n\t" - "mov r12, #0\n\t" + "ldm %[a], {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + /* Clear overflow and underflow */ + "mov lr, #0\n\t" + "mov r10, #0\n\t" "# t[0] = 1 1 0 -1 -1 -1 -1 0\n\t" - "adds r10, r2, r3\n\t" - "adc r14, r14, #0\n\t" - "subs r10, r10, r5\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r6\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r7\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r8\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[0]\n\t" - "str r10, [sp, #0]\n\t" - "neg r12, r12\n\t" - "mov r10, #0\n\t" + "adds r12, r2, r3\n\t" + "adc lr, lr, #0\n\t" + "subs r12, r12, r5\n\t" + "sbc r10, r10, #0\n\t" + "subs r12, r12, r6\n\t" + "sbc r10, r10, #0\n\t" + "subs r12, r12, r7\n\t" + "sbc r10, r10, #0\n\t" + "subs r12, r12, r8\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[0] */ + "str r12, [sp]\n\t" + "neg r10, r10\n\t" + "mov r12, #0\n\t" "# t[1] = 0 1 1 0 -1 -1 -1 -1\n\t" - "adds r14, r14, r3\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r4\n\t" - "adc r10, r10, #0\n\t" - "subs r14, r14, r12\n\t" - "mov r12, #0\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r6\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r7\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r8\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r9\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[1]\n\t" - "str r14, [sp, #4]\n\t" - "neg r12, r12\n\t" - "mov r14, #0\n\t" + "adds lr, lr, r3\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r4\n\t" + "adc r12, r12, #0\n\t" + "subs lr, lr, r10\n\t" + "sbc r10, r10, r10\n\t" + "subs lr, lr, r6\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r7\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r8\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r9\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[1] */ + "str lr, [sp, #4]\n\t" + "neg r10, r10\n\t" + "mov lr, #0\n\t" "# t[2] = 0 0 1 1 0 -1 -1 -1\n\t" - "adds r10, r10, r4\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r5\n\t" - "adc r14, r14, #0\n\t" - "subs r10, r10, r12\n\t" + "adds r12, r12, r4\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r5\n\t" + "adc lr, lr, #0\n\t" + "subs r12, r12, r10\n\t" + "sbc r10, r10, r10\n\t" + "subs r12, r12, r7\n\t" + "sbc r10, r10, #0\n\t" + "subs r12, r12, r8\n\t" + "sbc r10, r10, #0\n\t" + "subs r12, r12, r9\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[2] */ + "str r12, [sp, #8]\n\t" + "neg r10, r10\n\t" "mov r12, #0\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r7\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r8\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r9\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[2]\n\t" - "str r10, [sp, #8]\n\t" - "neg r12, r12\n\t" - "mov r10, #0\n\t" "# t[3] = -1 -1 0 2 2 1 0 -1\n\t" - "adds r14, r14, r5\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r5\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r6\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r6\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r7\n\t" - "adc r10, r10, #0\n\t" - "subs r14, r14, r12\n\t" - "mov r12, #0\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r2\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r3\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r9\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[3]\n\t" - "str r14, [sp, #12]\n\t" - "neg r12, r12\n\t" - "mov r14, #0\n\t" + "adds lr, lr, r5\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r5\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r6\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r6\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r7\n\t" + "adc r12, r12, #0\n\t" + "subs lr, lr, r10\n\t" + "sbc r10, r10, r10\n\t" + "subs lr, lr, r2\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r3\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r9\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[3] */ + "str lr, [sp, #12]\n\t" + "neg r10, r10\n\t" + "mov lr, #0\n\t" "# t[4] = 0 -1 -1 0 2 2 1 0\n\t" - "adds r10, r10, r6\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r6\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r7\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r7\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r8\n\t" - "adc r14, r14, #0\n\t" - "subs r10, r10, r12\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r6\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r7\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r7\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r8\n\t" + "adc lr, lr, #0\n\t" + "subs r12, r12, r10\n\t" + "sbc r10, r10, r10\n\t" + "subs r12, r12, r3\n\t" + "sbc r10, r10, #0\n\t" + "subs r12, r12, r4\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[4] */ + "str r12, [sp, #16]\n\t" + "neg r10, r10\n\t" "mov r12, #0\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r3\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r4\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[4]\n\t" - "str r10, [sp, #16]\n\t" - "neg r12, r12\n\t" - "mov r10, #0\n\t" "# t[5] = 0 0 -1 -1 0 2 2 1\n\t" - "adds r14, r14, r7\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r7\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r8\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r8\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r9\n\t" - "adc r10, r10, #0\n\t" - "subs r14, r14, r12\n\t" - "mov r12, #0\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r4\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r5\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[5]\n\t" - "str r14, [sp, #20]\n\t" - "neg r12, r12\n\t" - "mov r14, #0\n\t" + "adds lr, lr, r7\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r7\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r8\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r8\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r9\n\t" + "adc r12, r12, #0\n\t" + "subs lr, lr, r10\n\t" + "sbc r10, r10, r10\n\t" + "subs lr, lr, r4\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r5\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[5] */ + "str lr, [sp, #20]\n\t" + "neg r10, r10\n\t" + "mov lr, #0\n\t" "# t[6] = -1 -1 0 0 0 1 3 2\n\t" - "adds r10, r10, r7\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r8\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r8\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r8\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r9\n\t" - "adc r14, r14, #0\n\t" - "adds r10, r10, r9\n\t" - "adc r14, r14, #0\n\t" - "subs r10, r10, r12\n\t" + "adds r12, r12, r7\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r8\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r8\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r8\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r9\n\t" + "adc lr, lr, #0\n\t" + "adds r12, r12, r9\n\t" + "adc lr, lr, #0\n\t" + "subs r12, r12, r10\n\t" + "sbc r10, r10, r10\n\t" + "subs r12, r12, r2\n\t" + "sbc r10, r10, #0\n\t" + "subs r12, r12, r3\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[6] */ + "mov r8, r12\n\t" + "neg r10, r10\n\t" "mov r12, #0\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r2\n\t" - "sbc r12, r12, #0\n\t" - "subs r10, r10, r3\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[6]\n\t" - "mov r8, r10\n\t" - "neg r12, r12\n\t" - "mov r10, #0\n\t" "# t[7] = 1 0 -1 -1 -1 -1 0 3\n\t" - "adds r14, r14, r2\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r9\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r9\n\t" - "adc r10, r10, #0\n\t" - "adds r14, r14, r9\n\t" - "adc r10, r10, #0\n\t" - "subs r14, r14, r12\n\t" - "mov r12, #0\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r4\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r5\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r6\n\t" - "sbc r12, r12, #0\n\t" - "subs r14, r14, r7\n\t" - "sbc r12, r12, #0\n\t" - "# Store t[7]\n\t" - "# Load intermediate\n\t" - "ldr r2, [sp, #0]\n\t" - "ldr r3, [sp, #4]\n\t" - "ldr r4, [sp, #8]\n\t" - "ldr r5, [sp, #12]\n\t" - "ldr r6, [sp, #16]\n\t" - "ldr r7, [sp, #20]\n\t" - "neg r12, r12\n\t" - "# Add overflow\n\t" - "# Subtract underflow - add neg underflow\n\t" - "adds r2, r2, r10\n\t" + "adds lr, lr, r2\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r9\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r9\n\t" + "adc r12, r12, #0\n\t" + "adds lr, lr, r9\n\t" + "adc r12, r12, #0\n\t" + "subs lr, lr, r10\n\t" + "sbc r10, r10, r10\n\t" + "subs lr, lr, r4\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r5\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r6\n\t" + "sbc r10, r10, #0\n\t" + "subs lr, lr, r7\n\t" + "sbc r10, r10, #0\n\t" + /* Store t[7] */ + /* Load intermediate */ + "ldm sp, {r2, r3, r4, r5, r6, r7}\n\t" + "neg r10, r10\n\t" + /* Add overflow */ + /* Subtract underflow - add neg underflow */ + "adds r2, r2, r12\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "adds r5, r5, r12\n\t" + "adcs r5, r5, r10\n\t" "adcs r6, r6, #0\n\t" "adcs r7, r7, #0\n\t" - "adcs r8, r8, r12\n\t" - "adc r14, r14, r10\n\t" - "# Subtract overflow\n\t" - "# Add underflow - subtract neg underflow\n\t" - "subs r2, r2, r12\n\t" + "adcs r8, r8, r10\n\t" + "adc lr, lr, r12\n\t" + /* Subtract overflow */ + /* Add underflow - subtract neg underflow */ + "subs r2, r2, r10\n\t" "sbcs r3, r3, #0\n\t" "sbcs r4, r4, #0\n\t" - "subs r5, r5, r10\n\t" + "sbcs r5, r5, r12\n\t" "sbcs r6, r6, #0\n\t" "sbcs r7, r7, #0\n\t" - "sbcs r8, r8, r10\n\t" - "sbc r14, r14, r12\n\t" - "# Store result\n\t" - "str r2, [%[r], #0]\n\t" - "str r3, [%[r], #4]\n\t" - "str r4, [%[r], #8]\n\t" - "str r5, [%[r], #12]\n\t" - "str r6, [%[r], #16]\n\t" - "str r7, [%[r], #20]\n\t" - "str r8, [%[r], #24]\n\t" - "str r14, [%[r], #28]\n\t" + "sbcs r8, r8, r12\n\t" + "sbc lr, lr, r10\n\t" + /* Store result */ + "stm %[r], {r2, r3, r4, r5, r6, r7, r8, lr}\n\t" + "mov %[r], #0\n\t" "add sp, sp, #24\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" ); - - return MP_OKAY; + (void)m; + return (uint32_t)(size_t)r; } /* Convert an mp_int to an array of sp_digit. @@ -29545,627 +64907,2579 @@ static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm) * m Modulus (prime). * mp Montgomery mulitplier. */ -SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m, sp_digit mp) +static SP_NOINLINE void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m, sp_digit mp) { - (void)mp; - (void)m; - __asm__ __volatile__ ( - "sub sp, sp, #68\n\t" + "sub sp, sp, #0x44\n\t" "mov r5, #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #0]\n\t" + /* A[0] * B[0] */ + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r8, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r3, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r9, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "mul r4, r3, r4\n\t" + "add r9, r9, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adc r9, r9, r4\n\t" +#else "umull r8, r9, r6, r7\n\t" - "str r8, [sp, #0]\n\t" - "# A[0] * B[1]\n\t" +#endif + "str r8, [sp]\n\t" + /* A[0] * B[1] */ "ldr r7, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "mov r10, #0\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add r10, r10, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, r4\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adc r10, r4, #0\n\t" - "# A[1] * B[0]\n\t" +#endif + /* A[1] * B[0] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #0]\n\t" + "ldr r7, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, #0\n\t" + "adc lr, r5, #0\n\t" +#endif "str r9, [sp, #4]\n\t" - "# A[2] * B[0]\n\t" + /* A[2] * B[0] */ "ldr r6, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adc lr, lr, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add lr, lr, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adc lr, lr, r4\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adc r14, r4, r14\n\t" - "# A[1] * B[1]\n\t" + "adds r10, r10, r3\n\t" + "adc lr, r4, lr\n\t" +#endif + /* A[1] * B[1] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #4]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, #0\n\t" - "# A[0] * B[2]\n\t" - "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #8]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, #0\n\t" +#endif + /* A[0] * B[2] */ + "ldr r6, [%[a]]\n\t" + "ldr r7, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, r8\n\t" +#endif "str r10, [sp, #8]\n\t" - "# A[0] * B[3]\n\t" + /* A[0] * B[3] */ "ldr r7, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, #0\n\t" - "# A[1] * B[2]\n\t" +#endif + /* A[1] * B[2] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "# A[2] * B[1]\n\t" +#endif + /* A[2] * B[1] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "# A[3] * B[0]\n\t" +#endif + /* A[3] * B[0] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #0]\n\t" + "ldr r7, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "str r14, [sp, #12]\n\t" - "# A[4] * B[0]\n\t" +#endif + "str lr, [sp, #12]\n\t" + /* A[4] * B[0] */ "ldr r6, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, #0\n\t" - "# A[3] * B[1]\n\t" +#endif + /* A[3] * B[1] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, r10\n\t" - "# A[2] * B[2]\n\t" +#endif + /* A[2] * B[2] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, r10\n\t" - "# A[1] * B[3]\n\t" +#endif + /* A[1] * B[3] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, r10\n\t" - "# A[0] * B[4]\n\t" - "ldr r6, [%[a], #0]\n\t" +#endif + /* A[0] * B[4] */ + "ldr r6, [%[a]]\n\t" "ldr r7, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, r10\n\t" +#endif "str r8, [sp, #16]\n\t" - "# A[0] * B[5]\n\t" + /* A[0] * B[5] */ "ldr r7, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, #0\n\t" - "# A[1] * B[4]\n\t" + "adc lr, r5, #0\n\t" +#endif + /* A[1] * B[4] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[2] * B[3]\n\t" + "adc lr, r5, lr\n\t" +#endif + /* A[2] * B[3] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[3] * B[2]\n\t" + "adc lr, r5, lr\n\t" +#endif + /* A[3] * B[2] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[4] * B[1]\n\t" + "adc lr, r5, lr\n\t" +#endif + /* A[4] * B[1] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[5] * B[0]\n\t" + "adc lr, r5, lr\n\t" +#endif + /* A[5] * B[0] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #0]\n\t" + "ldr r7, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" + "adc lr, r5, lr\n\t" +#endif "str r9, [sp, #20]\n\t" - "# A[6] * B[0]\n\t" + /* A[6] * B[0] */ "ldr r6, [%[a], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, #0\n\t" - "# A[5] * B[1]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, #0\n\t" +#endif + /* A[5] * B[1] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #4]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" - "# A[4] * B[2]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, r8\n\t" +#endif + /* A[4] * B[2] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #8]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" - "# A[3] * B[3]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, r8\n\t" +#endif + /* A[3] * B[3] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #12]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" - "# A[2] * B[4]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, r8\n\t" +#endif + /* A[2] * B[4] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" - "# A[1] * B[5]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, r8\n\t" +#endif + /* A[1] * B[5] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" - "# A[0] * B[6]\n\t" - "ldr r6, [%[a], #0]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, r8\n\t" +#endif + /* A[0] * B[6] */ + "ldr r6, [%[a]]\n\t" "ldr r7, [%[b], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, r8\n\t" +#endif "str r10, [sp, #24]\n\t" - "# A[0] * B[7]\n\t" + /* A[0] * B[7] */ "ldr r7, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, #0\n\t" - "# A[1] * B[6]\n\t" +#endif + /* A[1] * B[6] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "# A[2] * B[5]\n\t" +#endif + /* A[2] * B[5] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "# A[3] * B[4]\n\t" +#endif + /* A[3] * B[4] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "# A[4] * B[3]\n\t" +#endif + /* A[4] * B[3] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "# A[5] * B[2]\n\t" +#endif + /* A[5] * B[2] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "# A[6] * B[1]\n\t" +#endif + /* A[6] * B[1] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "# A[7] * B[0]\n\t" +#endif + /* A[7] * B[0] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #0]\n\t" + "ldr r7, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "str r14, [sp, #28]\n\t" - "# A[7] * B[1]\n\t" +#endif + "str lr, [sp, #28]\n\t" + /* A[7] * B[1] */ "ldr r7, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, #0\n\t" - "# A[6] * B[2]\n\t" +#endif + /* A[6] * B[2] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, r10\n\t" - "# A[5] * B[3]\n\t" +#endif + /* A[5] * B[3] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, r10\n\t" - "# A[4] * B[4]\n\t" +#endif + /* A[4] * B[4] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, r10\n\t" - "# A[3] * B[5]\n\t" +#endif + /* A[3] * B[5] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, r10\n\t" - "# A[2] * B[6]\n\t" +#endif + /* A[2] * B[6] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, r10\n\t" - "# A[1] * B[7]\n\t" +#endif + /* A[1] * B[7] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, r10\n\t" +#endif "str r8, [sp, #32]\n\t" - "# A[2] * B[7]\n\t" + /* A[2] * B[7] */ "ldr r6, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, #0\n\t" - "# A[3] * B[6]\n\t" + "adc lr, r5, #0\n\t" +#endif + /* A[3] * B[6] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[4] * B[5]\n\t" + "adc lr, r5, lr\n\t" +#endif + /* A[4] * B[5] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[5] * B[4]\n\t" + "adc lr, r5, lr\n\t" +#endif + /* A[5] * B[4] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[6] * B[3]\n\t" + "adc lr, r5, lr\n\t" +#endif + /* A[6] * B[3] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[7] * B[2]\n\t" + "adc lr, r5, lr\n\t" +#endif + /* A[7] * B[2] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" + "adc lr, r5, lr\n\t" +#endif "str r9, [sp, #36]\n\t" - "# A[7] * B[3]\n\t" + /* A[7] * B[3] */ "ldr r7, [%[b], #12]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, #0\n\t" - "# A[6] * B[4]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, #0\n\t" +#endif + /* A[6] * B[4] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" - "# A[5] * B[5]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, r8\n\t" +#endif + /* A[5] * B[5] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" - "# A[4] * B[6]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, r8\n\t" +#endif + /* A[4] * B[6] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[b], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" - "# A[3] * B[7]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, r8\n\t" +#endif + /* A[3] * B[7] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[b], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" - "str r10, [sp, #40]\n\t" - "# A[4] * B[7]\n\t" - "ldr r6, [%[a], #16]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, r8\n\t" +#endif + "str r10, [sp, #40]\n\t" + /* A[4] * B[7] */ + "ldr r6, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, #0\n\t" - "# A[5] * B[6]\n\t" +#endif + /* A[5] * B[6] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "# A[6] * B[5]\n\t" +#endif + /* A[6] * B[5] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "# A[7] * B[4]\n\t" +#endif + /* A[7] * B[4] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "str r14, [sp, #44]\n\t" - "# A[7] * B[5]\n\t" +#endif + "str lr, [sp, #44]\n\t" + /* A[7] * B[5] */ "ldr r7, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, #0\n\t" - "# A[6] * B[6]\n\t" +#endif + /* A[6] * B[6] */ "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, r10\n\t" - "# A[5] * B[7]\n\t" +#endif + /* A[5] * B[7] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, r10\n\t" - "# A[6] * B[7]\n\t" +#endif + /* A[6] * B[7] */ "ldr r6, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, #0\n\t" - "# A[7] * B[6]\n\t" + "adc lr, r5, #0\n\t" +#endif + /* A[7] * B[6] */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[7] * B[7]\n\t" + "adc lr, r5, lr\n\t" +#endif + /* A[7] * B[7] */ "ldr r7, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adc lr, lr, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add lr, lr, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adc lr, lr, r4\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adc r14, r4, r14\n\t" + "adds r10, r10, r3\n\t" + "adc lr, r4, lr\n\t" +#endif "str r8, [sp, #48]\n\t" "str r9, [sp, #52]\n\t" "str r10, [sp, #56]\n\t" - "str r14, [sp, #60]\n\t" - "# Start Reduction\n\t" - "ldr r4, [sp, #0]\n\t" + "str lr, [sp, #60]\n\t" + /* Start Reduction */ + "ldr r4, [sp]\n\t" "ldr r5, [sp, #4]\n\t" "ldr r6, [sp, #8]\n\t" "ldr r7, [sp, #12]\n\t" "ldr r8, [sp, #16]\n\t" "ldr r9, [sp, #20]\n\t" "ldr r10, [sp, #24]\n\t" - "ldr r14, [sp, #28]\n\t" - "# mu = a[0..7] + a[0..4] << 96 + (a[0..1] * 2) << 192\n\t" - "# - a[0] << 224\n\t" - "# + (a[0..1] * 2) << (6 * 32)\n\t" + "ldr lr, [sp, #28]\n\t" + /* mu = a[0..7] + a[0..4] << 96 + (a[0..1] * 2) << 192 */ + /* - a[0] << 224 */ + /* + (a[0..1] * 2) << (6 * 32) */ "adds r10, r10, r4\n\t" - "adc r14, r14, r5\n\t" + "adc lr, lr, r5\n\t" "adds r10, r10, r4\n\t" - "adc r14, r14, r5\n\t" - "# - a[0] << (7 * 32)\n\t" - "sub r14, r14, r4\n\t" - "# + a[0..4] << (3 * 32)\n\t" + "adc lr, lr, r5\n\t" + /* - a[0] << (7 * 32) */ + "sub lr, lr, r4\n\t" + /* + a[0..4] << (3 * 32) */ "mov %[a], r7\n\t" - "mov %[b], r8\n\t" + "mov r12, r8\n\t" "adds r7, r7, r4\n\t" "adcs r8, r8, r5\n\t" "adcs r9, r9, r6\n\t" "adcs r10, r10, %[a]\n\t" - "adc r14, r14, %[b]\n\t" + "adc lr, lr, r12\n\t" "str r7, [sp, #12]\n\t" "str r8, [sp, #16]\n\t" "str r9, [sp, #20]\n\t" - "# a += mu * m\n\t" - "# += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1)\n\t" - "mov %[a], #0\n\t" - "# a[6] += t[0] + t[3]\n\t" - "ldr r3, [sp, #24]\n\t" - "adds r3, r3, r4\n\t" - "adc %[b], %[a], #0\n\t" - "adds r3, r3, r7\n\t" - "adc %[b], %[b], #0\n\t" + /* a += mu * m */ + /* += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1) */ + /* a[6] += t[0] + t[3] */ + /* a[7] += t[1] + t[4] */ + "ldr %[a], [sp, #24]\n\t" + "ldr %[b], [sp, #28]\n\t" + "adds %[a], %[a], r4\n\t" + "adcs %[b], %[b], r5\n\t" + "mov r12, #0\n\t" + "adc r12, r12, #0\n\t" + "adds %[a], %[a], r7\n\t" + "adcs %[b], %[b], r8\n\t" + "adc r12, r12, #0\n\t" "str r10, [sp, #24]\n\t" - "# a[7] += t[1] + t[4]\n\t" - "ldr r3, [sp, #28]\n\t" - "adds r3, r3, %[b]\n\t" - "adc %[b], %[a], #0\n\t" - "adds r3, r3, r5\n\t" - "adc %[b], %[b], #0\n\t" - "adds r3, r3, r8\n\t" - "adc %[b], %[b], #0\n\t" - "str r14, [sp, #28]\n\t" - "str r3, [sp, #64]\n\t" - "# a[8] += t[0] + t[2] + t[5]\n\t" - "ldr r3, [sp, #32]\n\t" - "adds r3, r3, %[b]\n\t" - "adc %[b], %[a], #0\n\t" - "adds r3, r3, r4\n\t" - "adc %[b], %[b], #0\n\t" - "adds r3, r3, r6\n\t" - "adc %[b], %[b], #0\n\t" - "adds r3, r3, r9\n\t" - "adc %[b], %[b], #0\n\t" - "str r3, [sp, #32]\n\t" - "# a[9] += t[1] + t[3] + t[6]\n\t" - "# a[10] += t[2] + t[4] + t[7]\n\t" - "ldr r3, [sp, #36]\n\t" - "ldr r4, [sp, #40]\n\t" - "adds r3, r3, %[b]\n\t" - "adcs r4, r4, #0\n\t" - "adc %[b], %[a], #0\n\t" - "adds r3, r3, r5\n\t" - "adcs r4, r4, r6\n\t" - "adc %[b], %[b], #0\n\t" - "adds r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adc %[b], %[b], #0\n\t" - "adds r3, r3, r10\n\t" - "adcs r4, r4, r14\n\t" - "adc %[b], %[b], #0\n\t" - "str r3, [sp, #36]\n\t" - "str r4, [sp, #40]\n\t" - "# a[11] += t[3] + t[5]\n\t" - "# a[12] += t[4] + t[6]\n\t" - "# a[13] += t[5] + t[7]\n\t" - "# a[14] += t[6]\n\t" - "ldr r3, [sp, #44]\n\t" - "ldr r4, [sp, #48]\n\t" - "ldr r5, [sp, #52]\n\t" - "ldr r6, [sp, #56]\n\t" - "adds r3, r3, %[b]\n\t" + "str lr, [sp, #28]\n\t" + "str %[b], [sp, #64]\n\t" + /* a[8] += t[0] + t[2] + t[5] */ + /* a[9] += t[1] + t[3] + t[6] */ + /* a[10] += t[2] + t[4] + t[7] */ + "ldr %[a], [sp, #32]\n\t" + "ldr %[b], [sp, #36]\n\t" + "ldr r3, [sp, #40]\n\t" + "adds %[a], %[a], r12\n\t" + "adcs %[b], %[b], #0\n\t" + "adcs r3, r3, #0\n\t" + "mov r12, #0\n\t" + "adc r12, r12, #0\n\t" + "adds %[a], %[a], r4\n\t" + "adcs %[b], %[b], r5\n\t" + "adcs r3, r3, r6\n\t" + "adc r12, r12, #0\n\t" + "adds %[a], %[a], r6\n\t" + "adcs %[b], %[b], r7\n\t" + "adcs r3, r3, r8\n\t" + "adc r12, r12, #0\n\t" + "adds %[a], %[a], r9\n\t" + "adcs %[b], %[b], r10\n\t" + "adcs r3, r3, lr\n\t" + "adc r12, r12, #0\n\t" + "str %[a], [sp, #32]\n\t" + "str %[b], [sp, #36]\n\t" + "str r3, [sp, #40]\n\t" + /* a[11] += t[3] + t[5] */ + /* a[12] += t[4] + t[6] */ + /* a[13] += t[5] + t[7] */ + /* a[14] += t[6] */ + /* a[15] += t[7] */ + "ldr %[a], [sp, #44]\n\t" + "ldr %[b], [sp, #48]\n\t" + "ldr r3, [sp, #52]\n\t" + "ldr r4, [sp, #56]\n\t" + "ldr r5, [sp, #60]\n\t" + "adds %[a], %[a], r12\n\t" + "adcs %[b], %[b], #0\n\t" + "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" "adcs r5, r5, #0\n\t" - "adcs r6, r6, #0\n\t" - "adc %[b], %[a], #0\n\t" - "adds r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adc %[b], %[b], #0\n\t" - "adds r3, r3, r9\n\t" + "mov r12, #0\n\t" + "adc r12, r12, #0\n\t" + "adds %[a], %[a], r7\n\t" + "adcs %[b], %[b], r8\n\t" + "adcs r3, r3, r9\n\t" "adcs r4, r4, r10\n\t" - "adcs r5, r5, r14\n\t" - "adcs r6, r6, #0\n\t" - "adc %[b], %[b], #0\n\t" - "str r3, [sp, #44]\n\t" - "str r4, [sp, #48]\n\t" - "str r5, [sp, #52]\n\t" - "str r6, [sp, #56]\n\t" - "# a[15] += t[7]\n\t" - "ldr r3, [sp, #60]\n\t" - "adds r3, r3, %[b]\n\t" - "adc %[b], %[a], #0\n\t" - "adds r3, r3, r14\n\t" - "adc %[b], %[b], #0\n\t" - "str r3, [sp, #60]\n\t" - "ldr r3, [sp, #64]\n\t" - "ldr r4, [sp, #32]\n\t" - "ldr r5, [sp, #36]\n\t" - "ldr r6, [sp, #40]\n\t" - "ldr r8, [sp, #0]\n\t" + "adcs r5, r5, lr\n\t" + "adc r12, r12, #0\n\t" + "adds %[a], %[a], r9\n\t" + "adcs %[b], %[b], r10\n\t" + "adcs r3, r3, lr\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adc r12, r12, #0\n\t" + "str %[a], [sp, #44]\n\t" + "str %[b], [sp, #48]\n\t" + "str r3, [sp, #52]\n\t" + "str r4, [sp, #56]\n\t" + "str r5, [sp, #60]\n\t" + "ldr %[a], [sp, #64]\n\t" + "ldr %[b], [sp, #32]\n\t" + "ldr r3, [sp, #36]\n\t" + "ldr r4, [sp, #40]\n\t" + "ldr r8, [sp]\n\t" "ldr r9, [sp, #4]\n\t" "ldr r10, [sp, #8]\n\t" - "ldr r14, [sp, #12]\n\t" - "subs r3, r3, r8\n\t" - "sbcs r4, r4, r9\n\t" - "sbcs r5, r5, r10\n\t" - "sbcs r6, r6, r14\n\t" - "str r4, [sp, #32]\n\t" - "str r5, [sp, #36]\n\t" - "str r6, [sp, #40]\n\t" - "ldr r3, [sp, #44]\n\t" - "ldr r4, [sp, #48]\n\t" - "ldr r5, [sp, #52]\n\t" - "ldr r6, [sp, #56]\n\t" - "ldr r7, [sp, #60]\n\t" + "ldr lr, [sp, #12]\n\t" + "subs %[a], %[a], r8\n\t" + "sbcs %[b], %[b], r9\n\t" + "sbcs r3, r3, r10\n\t" + "sbcs r4, r4, lr\n\t" + "str %[b], [sp, #32]\n\t" + "str r3, [sp, #36]\n\t" + "str r4, [sp, #40]\n\t" + "ldr %[a], [sp, #44]\n\t" + "ldr %[b], [sp, #48]\n\t" + "ldr r3, [sp, #52]\n\t" + "ldr r4, [sp, #56]\n\t" + "ldr r5, [sp, #60]\n\t" "ldr r8, [sp, #16]\n\t" "ldr r9, [sp, #20]\n\t" "ldr r10, [sp, #24]\n\t" - "ldr r14, [sp, #28]\n\t" - "sbcs r3, r3, r8\n\t" - "sbcs r4, r4, r9\n\t" - "sbcs r5, r5, r10\n\t" - "sbcs r6, r6, r14\n\t" - "sbc r7, r7, #0\n\t" - "# mask m and sub from result if overflow\n\t" - "sub %[b], %[a], %[b]\n\t" - "and %[a], %[b], #1\n\t" - "ldr r8, [sp, #32]\n\t" - "ldr r9, [sp, #36]\n\t" - "ldr r10, [sp, #40]\n\t" - "subs r8, r8, %[b]\n\t" - "sbcs r9, r9, %[b]\n\t" - "sbcs r10, r10, %[b]\n\t" + "ldr lr, [sp, #28]\n\t" + "sbcs %[a], %[a], r8\n\t" + "sbcs %[b], %[b], r9\n\t" + "sbcs r3, r3, r10\n\t" + "sbcs r4, r4, lr\n\t" + "sbc r5, r5, #0\n\t" + /* mask m and sub from result if overflow */ + "rsb r12, r12, #0\n\t" + "and lr, r12, #1\n\t" + "ldr r6, [sp, #32]\n\t" + "ldr r7, [sp, #36]\n\t" + "ldr r8, [sp, #40]\n\t" + "subs r6, r6, r12\n\t" + "sbcs r7, r7, r12\n\t" + "sbcs r8, r8, r12\n\t" + "sbcs %[a], %[a], #0\n\t" + "sbcs %[b], %[b], #0\n\t" "sbcs r3, r3, #0\n\t" - "sbcs r4, r4, #0\n\t" - "sbcs r5, r5, #0\n\t" - "sbcs r6, r6, %[a]\n\t" - "sbc r7, r7, %[b]\n\t" - "str r8, [%[r], #0]\n\t" - "str r9, [%[r], #4]\n\t" - "str r10, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "add sp, sp, #68\n\t" - : [a] "+r" (a), [b] "+r" (b) - : [r] "r" (r) - : "memory", "r8", "r9", "r10", "r14", "r3", "r4", "r5", "r6", "r7" + "sbcs r4, r4, lr\n\t" + "sbc r5, r5, r12\n\t" + "stm %[r]!, {r6, r7, r8}\n\t" + "stm %[r]!, {%[a], %[b], r3, r4, r5}\n\t" + "add sp, sp, #0x44\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r12" ); + (void)mp; + (void)m; + (void)mp; } /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m) @@ -30175,213 +67489,1052 @@ SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const * m Modulus (prime). * mp Montgomery mulitplier. */ -SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m, sp_digit mp) { - (void)mp; - (void)m; - __asm__ __volatile__ ( - "sub sp, sp, #68\n\t" + "sub sp, sp, #0x44\n\t" "mov r5, #0\n\t" - "# A[0] * A[1]\n\t" - "ldr r6, [%[a], #0]\n\t" + /* A[0] * A[1] */ + "ldr r6, [%[a]]\n\t" "ldr r7, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r9, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r3, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r10, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "mul r4, r3, r4\n\t" + "add r10, r10, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, r4\n\t" +#else "umull r9, r10, r6, r7\n\t" +#endif "str r9, [sp, #4]\n\t" - "# A[0] * A[2]\n\t" + /* A[0] * A[2] */ "ldr r7, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "mov lr, #0\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adc lr, lr, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add lr, lr, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adc lr, lr, r4\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adc r14, r4, #0\n\t" + "adds r10, r10, r3\n\t" + "adc lr, r4, #0\n\t" +#endif "str r10, [sp, #8]\n\t" - "# A[0] * A[3]\n\t" + /* A[0] * A[3] */ "ldr r7, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "mov r8, #0\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adc r8, r8, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add r8, r8, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adc r8, r8, r4\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adc r8, r4, #0\n\t" - "# A[1] * A[2]\n\t" +#endif + /* A[1] * A[2] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, #0\n\t" - "str r14, [sp, #12]\n\t" - "# A[1] * A[3]\n\t" +#endif + "str lr, [sp, #12]\n\t" + /* A[1] * A[3] */ "ldr r7, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adc r9, r9, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add r9, r9, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adc r9, r9, r4\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adc r9, r4, r9\n\t" - "# A[0] * A[4]\n\t" - "ldr r6, [%[a], #0]\n\t" +#endif + /* A[0] * A[4] */ + "ldr r6, [%[a]]\n\t" "ldr r7, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, #0\n\t" +#endif "str r8, [sp, #16]\n\t" - "# A[0] * A[5]\n\t" + /* A[0] * A[5] */ "ldr r7, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add r10, r10, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, r4\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adc r10, r4, r10\n\t" - "# A[1] * A[4]\n\t" +#endif + /* A[1] * A[4] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, #0\n\t" - "# A[2] * A[3]\n\t" + "adc lr, r5, #0\n\t" +#endif + /* A[2] * A[3] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" + "adc lr, r5, lr\n\t" +#endif "str r9, [sp, #20]\n\t" - "# A[2] * A[4]\n\t" + /* A[2] * A[4] */ "ldr r7, [%[a], #16]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, #0\n\t" - "# A[1] * A[5]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, #0\n\t" +#endif + /* A[1] * A[5] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #20]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" - "# A[0] * A[6]\n\t" - "ldr r6, [%[a], #0]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, r8\n\t" +#endif + /* A[0] * A[6] */ + "ldr r6, [%[a]]\n\t" "ldr r7, [%[a], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" - "str r10, [sp, #24]\n\t" - "# A[0] * A[7]\n\t" - "ldr r7, [%[a], #28]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, r8\n\t" +#endif + "str r10, [sp, #24]\n\t" + /* A[0] * A[7] */ + "ldr r7, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, #0\n\t" - "# A[1] * A[6]\n\t" +#endif + /* A[1] * A[6] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "# A[2] * A[5]\n\t" +#endif + /* A[2] * A[5] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "# A[3] * A[4]\n\t" +#endif + /* A[3] * A[4] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "str r14, [sp, #28]\n\t" - "# A[3] * A[5]\n\t" +#endif + "str lr, [sp, #28]\n\t" + /* A[3] * A[5] */ "ldr r7, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, #0\n\t" - "# A[2] * A[6]\n\t" +#endif + /* A[2] * A[6] */ "ldr r6, [%[a], #8]\n\t" "ldr r7, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, r10\n\t" - "# A[1] * A[7]\n\t" +#endif + /* A[1] * A[7] */ "ldr r6, [%[a], #4]\n\t" "ldr r7, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, r10\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, r10\n\t" +#endif "str r8, [sp, #32]\n\t" - "# A[2] * A[7]\n\t" + /* A[2] * A[7] */ "ldr r6, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, #0\n\t" - "# A[3] * A[6]\n\t" + "adc lr, r5, #0\n\t" +#endif + /* A[3] * A[6] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" - "# A[4] * A[5]\n\t" + "adc lr, r5, lr\n\t" +#endif + /* A[4] * A[5] */ "ldr r6, [%[a], #16]\n\t" "ldr r7, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adcs r10, r10, #0\n\t" + "adc lr, r5, lr\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adcs r10, r10, r4\n\t" + "adc lr, lr, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adcs r10, r4, r10\n\t" - "adc r14, r5, r14\n\t" + "adc lr, r5, lr\n\t" +#endif "str r9, [sp, #36]\n\t" - "# A[4] * A[6]\n\t" + /* A[4] * A[6] */ "ldr r7, [%[a], #24]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, #0\n\t" - "# A[3] * A[7]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, #0\n\t" +#endif + /* A[3] * A[7] */ "ldr r6, [%[a], #12]\n\t" "ldr r7, [%[a], #28]\n\t" - "umull r3, r4, r6, r7\n\t" - "adds r10, r3, r10\n\t" - "adcs r14, r4, r14\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r10, r10, r4\n\t" + "adcs lr, lr, #0\n\t" "adc r8, r5, r8\n\t" - "str r10, [sp, #40]\n\t" - "# A[4] * A[7]\n\t" - "ldr r6, [%[a], #16]\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, lr, r4\n\t" + "adc r8, r8, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds r10, r10, r3\n\t" + "adcs lr, r4, lr\n\t" + "adc r8, r5, r8\n\t" +#endif + "str r10, [sp, #40]\n\t" + /* A[4] * A[7] */ + "ldr r6, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else + "umull r3, r4, r6, r7\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, #0\n\t" - "# A[5] * A[6]\n\t" +#endif + /* A[5] * A[6] */ "ldr r6, [%[a], #20]\n\t" "ldr r7, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds lr, lr, r4\n\t" + "adcs r8, r8, #0\n\t" + "adc r9, r5, r9\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds lr, lr, r3\n\t" + "adcs r8, r8, r4\n\t" + "adc r9, r9, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r14, r3, r14\n\t" + "adds lr, lr, r3\n\t" "adcs r8, r4, r8\n\t" "adc r9, r5, r9\n\t" - "str r14, [sp, #44]\n\t" - "# A[5] * A[7]\n\t" +#endif + "str lr, [sp, #44]\n\t" + /* A[5] * A[7] */ "ldr r7, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, #0\n\t" + "adc r10, r5, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r8, r3, r8\n\t" + "adds r8, r8, r3\n\t" "adcs r9, r4, r9\n\t" "adc r10, r5, #0\n\t" +#endif "str r8, [sp, #48]\n\t" - "# A[6] * A[7]\n\t" + /* A[6] * A[7] */ "ldr r6, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r3, r6, #16\n\t" + "lsl r4, r7, #16\n\t" + "lsr r3, r3, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r3, r4\n\t" + "adds r9, r9, r4\n\t" + "adc r10, r10, #0\n\t" + "lsr r4, r7, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, r4\n\t" + "lsr r3, r6, #16\n\t" + "lsr r4, r7, #16\n\t" + "mul r4, r3, r4\n\t" + "add r10, r10, r4\n\t" + "lsl r4, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r3, r4, r3\n\t" + "lsr r4, r3, #16\n\t" + "lsl r3, r3, #16\n\t" + "adds r9, r9, r3\n\t" + "adc r10, r10, r4\n\t" +#else "umull r3, r4, r6, r7\n\t" - "adds r9, r3, r9\n\t" + "adds r9, r9, r3\n\t" "adc r10, r4, r10\n\t" +#endif "str r9, [sp, #52]\n\t" "str r10, [sp, #56]\n\t" - "# Double\n\t" + /* Double */ "ldr r4, [sp, #4]\n\t" "ldr r6, [sp, #8]\n\t" "ldr r7, [sp, #12]\n\t" "ldr r8, [sp, #16]\n\t" "ldr r9, [sp, #20]\n\t" "ldr r10, [sp, #24]\n\t" - "ldr r14, [sp, #28]\n\t" + "ldr lr, [sp, #28]\n\t" "ldr r12, [sp, #32]\n\t" "ldr r3, [sp, #36]\n\t" "adds r4, r4, r4\n\t" @@ -30390,7 +68543,7 @@ SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const "adcs r8, r8, r8\n\t" "adcs r9, r9, r9\n\t" "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" + "adcs lr, lr, lr\n\t" "adcs r12, r12, r12\n\t" "adcs r3, r3, r3\n\t" "str r4, [sp, #4]\n\t" @@ -30399,7 +68552,7 @@ SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const "str r8, [sp, #16]\n\t" "str r9, [sp, #20]\n\t" "str r10, [sp, #24]\n\t" - "str r14, [sp, #28]\n\t" + "str lr, [sp, #28]\n\t" "str r12, [sp, #32]\n\t" "str r3, [sp, #36]\n\t" "ldr r4, [sp, #40]\n\t" @@ -30422,241 +68575,349 @@ SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const "ldr r4, [sp, #4]\n\t" "ldr r5, [sp, #8]\n\t" "ldr r12, [sp, #12]\n\t" - "# A[0] * A[0]\n\t" - "ldr r6, [%[a], #0]\n\t" + /* A[0] * A[0] */ + "ldr r6, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r8, r6, r6\n\t" + "mul r9, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adds r8, r8, r6\n\t" + "adc r9, r9, r7\n\t" +#else "umull r8, r9, r6, r6\n\t" - "# A[1] * A[1]\n\t" +#endif + /* A[1] * A[1] */ "ldr r6, [%[a], #4]\n\t" - "umull r10, r14, r6, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r10, r6, r6\n\t" + "mul lr, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adds r10, r10, r6\n\t" + "adc lr, lr, r7\n\t" +#else + "umull r10, lr, r6, r6\n\t" +#endif "adds r9, r9, r4\n\t" "adcs r10, r10, r5\n\t" - "adcs r14, r14, r12\n\t" - "str r8, [sp, #0]\n\t" + "adcs lr, lr, r12\n\t" + "str r8, [sp]\n\t" "str r9, [sp, #4]\n\t" "str r10, [sp, #8]\n\t" - "str r14, [sp, #12]\n\t" + "str lr, [sp, #12]\n\t" "ldr r3, [sp, #16]\n\t" "ldr r4, [sp, #20]\n\t" "ldr r5, [sp, #24]\n\t" "ldr r12, [sp, #28]\n\t" - "# A[2] * A[2]\n\t" + /* A[2] * A[2] */ "ldr r6, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r8, r6, r6\n\t" + "mul r9, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adcs r8, r8, r6\n\t" + "adc r9, r9, r7\n\t" +#else "umull r8, r9, r6, r6\n\t" - "# A[3] * A[3]\n\t" +#endif + /* A[3] * A[3] */ "ldr r6, [%[a], #12]\n\t" - "umull r10, r14, r6, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r10, r6, r6\n\t" + "mul lr, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adds r10, r10, r6\n\t" + "adc lr, lr, r7\n\t" +#else + "umull r10, lr, r6, r6\n\t" +#endif +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "adcs r8, r8, r3\n\t" "adcs r9, r9, r4\n\t" +#else + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" +#endif "adcs r10, r10, r5\n\t" - "adcs r14, r14, r12\n\t" + "adcs lr, lr, r12\n\t" "str r8, [sp, #16]\n\t" "str r9, [sp, #20]\n\t" "str r10, [sp, #24]\n\t" - "str r14, [sp, #28]\n\t" + "str lr, [sp, #28]\n\t" "ldr r3, [sp, #32]\n\t" "ldr r4, [sp, #36]\n\t" "ldr r5, [sp, #40]\n\t" "ldr r12, [sp, #44]\n\t" - "# A[4] * A[4]\n\t" + /* A[4] * A[4] */ "ldr r6, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r8, r6, r6\n\t" + "mul r9, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adcs r8, r8, r6\n\t" + "adc r9, r9, r7\n\t" +#else "umull r8, r9, r6, r6\n\t" - "# A[5] * A[5]\n\t" +#endif + /* A[5] * A[5] */ "ldr r6, [%[a], #20]\n\t" - "umull r10, r14, r6, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r10, r6, r6\n\t" + "mul lr, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adds r10, r10, r6\n\t" + "adc lr, lr, r7\n\t" +#else + "umull r10, lr, r6, r6\n\t" +#endif +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "adcs r8, r8, r3\n\t" "adcs r9, r9, r4\n\t" +#else + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" +#endif "adcs r10, r10, r5\n\t" - "adcs r14, r14, r12\n\t" + "adcs lr, lr, r12\n\t" "str r8, [sp, #32]\n\t" "str r9, [sp, #36]\n\t" "str r10, [sp, #40]\n\t" - "str r14, [sp, #44]\n\t" + "str lr, [sp, #44]\n\t" "ldr r3, [sp, #48]\n\t" "ldr r4, [sp, #52]\n\t" "ldr r5, [sp, #56]\n\t" "ldr r12, [sp, #60]\n\t" - "# A[6] * A[6]\n\t" + /* A[6] * A[6] */ "ldr r6, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r8, r6, r6\n\t" + "mul r9, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adcs r8, r8, r6\n\t" + "adc r9, r9, r7\n\t" +#else "umull r8, r9, r6, r6\n\t" - "# A[7] * A[7]\n\t" +#endif + /* A[7] * A[7] */ "ldr r6, [%[a], #28]\n\t" - "umull r10, r14, r6, r6\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r10, r6, r6\n\t" + "mul lr, r7, r7\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #15\n\t" + "lsl r6, r6, #17\n\t" + "adds r10, r10, r6\n\t" + "adc lr, lr, r7\n\t" +#else + "umull r10, lr, r6, r6\n\t" +#endif +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "adcs r8, r8, r3\n\t" "adcs r9, r9, r4\n\t" +#else + "adds r8, r8, r3\n\t" + "adcs r9, r9, r4\n\t" +#endif "adcs r10, r10, r5\n\t" - "adc r14, r14, r12\n\t" + "adc lr, lr, r12\n\t" "str r8, [sp, #48]\n\t" "str r9, [sp, #52]\n\t" "str r10, [sp, #56]\n\t" - "str r14, [sp, #60]\n\t" - "# Start Reduction\n\t" - "ldr r4, [sp, #0]\n\t" + "str lr, [sp, #60]\n\t" + /* Start Reduction */ + "ldr r4, [sp]\n\t" "ldr r5, [sp, #4]\n\t" "ldr r6, [sp, #8]\n\t" "ldr r7, [sp, #12]\n\t" "ldr r8, [sp, #16]\n\t" "ldr r9, [sp, #20]\n\t" "ldr r10, [sp, #24]\n\t" - "ldr r14, [sp, #28]\n\t" - "# mu = a[0..7] + a[0..4] << 96 + (a[0..1] * 2) << 192\n\t" - "# - a[0] << 224\n\t" - "# + (a[0..1] * 2) << (6 * 32)\n\t" + "ldr lr, [sp, #28]\n\t" + /* mu = a[0..7] + a[0..4] << 96 + (a[0..1] * 2) << 192 */ + /* - a[0] << 224 */ + /* + (a[0..1] * 2) << (6 * 32) */ "adds r10, r10, r4\n\t" - "adc r14, r14, r5\n\t" + "adc lr, lr, r5\n\t" "adds r10, r10, r4\n\t" - "adc r14, r14, r5\n\t" - "# - a[0] << (7 * 32)\n\t" - "sub r14, r14, r4\n\t" - "# + a[0..4] << (3 * 32)\n\t" + "adc lr, lr, r5\n\t" + /* - a[0] << (7 * 32) */ + "sub lr, lr, r4\n\t" + /* + a[0..4] << (3 * 32) */ "mov %[a], r7\n\t" "mov r12, r8\n\t" "adds r7, r7, r4\n\t" "adcs r8, r8, r5\n\t" "adcs r9, r9, r6\n\t" "adcs r10, r10, %[a]\n\t" - "adc r14, r14, r12\n\t" + "adc lr, lr, r12\n\t" "str r7, [sp, #12]\n\t" "str r8, [sp, #16]\n\t" "str r9, [sp, #20]\n\t" - "# a += mu * m\n\t" - "# += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1)\n\t" - "mov %[a], #0\n\t" - "# a[6] += t[0] + t[3]\n\t" - "ldr r3, [sp, #24]\n\t" - "adds r3, r3, r4\n\t" - "adc r12, %[a], #0\n\t" - "adds r3, r3, r7\n\t" + /* a += mu * m */ + /* += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1) */ + /* a[6] += t[0] + t[3] */ + /* a[7] += t[1] + t[4] */ + "ldr %[a], [sp, #24]\n\t" + "ldr r2, [sp, #28]\n\t" + "adds %[a], %[a], r4\n\t" + "adcs r2, r2, r5\n\t" + "mov r12, #0\n\t" + "adc r12, r12, #0\n\t" + "adds %[a], %[a], r7\n\t" + "adcs r2, r2, r8\n\t" "adc r12, r12, #0\n\t" "str r10, [sp, #24]\n\t" - "# a[7] += t[1] + t[4]\n\t" - "ldr r3, [sp, #28]\n\t" - "adds r3, r3, r12\n\t" - "adc r12, %[a], #0\n\t" - "adds r3, r3, r5\n\t" + "str lr, [sp, #28]\n\t" + "str r2, [sp, #64]\n\t" + /* a[8] += t[0] + t[2] + t[5] */ + /* a[9] += t[1] + t[3] + t[6] */ + /* a[10] += t[2] + t[4] + t[7] */ + "ldr %[a], [sp, #32]\n\t" + "ldr r2, [sp, #36]\n\t" + "ldr r3, [sp, #40]\n\t" + "adds %[a], %[a], r12\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "mov r12, #0\n\t" "adc r12, r12, #0\n\t" - "adds r3, r3, r8\n\t" + "adds %[a], %[a], r4\n\t" + "adcs r2, r2, r5\n\t" + "adcs r3, r3, r6\n\t" "adc r12, r12, #0\n\t" - "str r14, [sp, #28]\n\t" - "str r3, [sp, #64]\n\t" - "# a[8] += t[0] + t[2] + t[5]\n\t" - "ldr r3, [sp, #32]\n\t" - "adds r3, r3, r12\n\t" - "adc r12, %[a], #0\n\t" - "adds r3, r3, r4\n\t" + "adds %[a], %[a], r6\n\t" + "adcs r2, r2, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r12, r12, #0\n\t" - "adds r3, r3, r6\n\t" + "adds %[a], %[a], r9\n\t" + "adcs r2, r2, r10\n\t" + "adcs r3, r3, lr\n\t" "adc r12, r12, #0\n\t" - "adds r3, r3, r9\n\t" - "adc r12, r12, #0\n\t" - "str r3, [sp, #32]\n\t" - "# a[9] += t[1] + t[3] + t[6]\n\t" - "# a[10] += t[2] + t[4] + t[7]\n\t" - "ldr r3, [sp, #36]\n\t" - "ldr r4, [sp, #40]\n\t" - "adds r3, r3, r12\n\t" - "adcs r4, r4, #0\n\t" - "adc r12, %[a], #0\n\t" - "adds r3, r3, r5\n\t" - "adcs r4, r4, r6\n\t" - "adc r12, r12, #0\n\t" - "adds r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adc r12, r12, #0\n\t" - "adds r3, r3, r10\n\t" - "adcs r4, r4, r14\n\t" - "adc r12, r12, #0\n\t" - "str r3, [sp, #36]\n\t" - "str r4, [sp, #40]\n\t" - "# a[11] += t[3] + t[5]\n\t" - "# a[12] += t[4] + t[6]\n\t" - "# a[13] += t[5] + t[7]\n\t" - "# a[14] += t[6]\n\t" - "ldr r3, [sp, #44]\n\t" - "ldr r4, [sp, #48]\n\t" - "ldr r5, [sp, #52]\n\t" - "ldr r6, [sp, #56]\n\t" - "adds r3, r3, r12\n\t" + "str %[a], [sp, #32]\n\t" + "str r2, [sp, #36]\n\t" + "str r3, [sp, #40]\n\t" + /* a[11] += t[3] + t[5] */ + /* a[12] += t[4] + t[6] */ + /* a[13] += t[5] + t[7] */ + /* a[14] += t[6] */ + /* a[15] += t[7] */ + "ldr %[a], [sp, #44]\n\t" + "ldr r2, [sp, #48]\n\t" + "ldr r3, [sp, #52]\n\t" + "ldr r4, [sp, #56]\n\t" + "ldr r5, [sp, #60]\n\t" + "adds %[a], %[a], r12\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" "adcs r5, r5, #0\n\t" - "adcs r6, r6, #0\n\t" - "adc r12, %[a], #0\n\t" - "adds r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" + "mov r12, #0\n\t" "adc r12, r12, #0\n\t" - "adds r3, r3, r9\n\t" + "adds %[a], %[a], r7\n\t" + "adcs r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" "adcs r4, r4, r10\n\t" - "adcs r5, r5, r14\n\t" - "adcs r6, r6, #0\n\t" + "adcs r5, r5, lr\n\t" "adc r12, r12, #0\n\t" - "str r3, [sp, #44]\n\t" - "str r4, [sp, #48]\n\t" - "str r5, [sp, #52]\n\t" - "str r6, [sp, #56]\n\t" - "# a[15] += t[7]\n\t" - "ldr r3, [sp, #60]\n\t" - "adds r3, r3, r12\n\t" - "adc r12, %[a], #0\n\t" - "adds r3, r3, r14\n\t" + "adds %[a], %[a], r9\n\t" + "adcs r2, r2, r10\n\t" + "adcs r3, r3, lr\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" "adc r12, r12, #0\n\t" - "str r3, [sp, #60]\n\t" - "ldr r3, [sp, #64]\n\t" - "ldr r4, [sp, #32]\n\t" - "ldr r5, [sp, #36]\n\t" - "ldr r6, [sp, #40]\n\t" - "ldr r8, [sp, #0]\n\t" + "str %[a], [sp, #44]\n\t" + "str r2, [sp, #48]\n\t" + "str r3, [sp, #52]\n\t" + "str r4, [sp, #56]\n\t" + "str r5, [sp, #60]\n\t" + "ldr %[a], [sp, #64]\n\t" + "ldr r2, [sp, #32]\n\t" + "ldr r3, [sp, #36]\n\t" + "ldr r4, [sp, #40]\n\t" + "ldr r8, [sp]\n\t" "ldr r9, [sp, #4]\n\t" "ldr r10, [sp, #8]\n\t" - "ldr r14, [sp, #12]\n\t" - "subs r3, r3, r8\n\t" - "sbcs r4, r4, r9\n\t" - "sbcs r5, r5, r10\n\t" - "sbcs r6, r6, r14\n\t" - "str r4, [sp, #32]\n\t" - "str r5, [sp, #36]\n\t" - "str r6, [sp, #40]\n\t" - "ldr r3, [sp, #44]\n\t" - "ldr r4, [sp, #48]\n\t" - "ldr r5, [sp, #52]\n\t" - "ldr r6, [sp, #56]\n\t" - "ldr r7, [sp, #60]\n\t" + "ldr lr, [sp, #12]\n\t" + "subs %[a], %[a], r8\n\t" + "sbcs r2, r2, r9\n\t" + "sbcs r3, r3, r10\n\t" + "sbcs r4, r4, lr\n\t" + "str r2, [sp, #32]\n\t" + "str r3, [sp, #36]\n\t" + "str r4, [sp, #40]\n\t" + "ldr %[a], [sp, #44]\n\t" + "ldr r2, [sp, #48]\n\t" + "ldr r3, [sp, #52]\n\t" + "ldr r4, [sp, #56]\n\t" + "ldr r5, [sp, #60]\n\t" "ldr r8, [sp, #16]\n\t" "ldr r9, [sp, #20]\n\t" "ldr r10, [sp, #24]\n\t" - "ldr r14, [sp, #28]\n\t" - "sbcs r3, r3, r8\n\t" - "sbcs r4, r4, r9\n\t" - "sbcs r5, r5, r10\n\t" - "sbcs r6, r6, r14\n\t" - "sbc r7, r7, #0\n\t" - "# mask m and sub from result if overflow\n\t" - "sub r12, %[a], r12\n\t" - "and %[a], r12, #1\n\t" - "ldr r8, [sp, #32]\n\t" - "ldr r9, [sp, #36]\n\t" - "ldr r10, [sp, #40]\n\t" - "subs r8, r8, r12\n\t" - "sbcs r9, r9, r12\n\t" - "sbcs r10, r10, r12\n\t" + "ldr lr, [sp, #28]\n\t" + "sbcs %[a], %[a], r8\n\t" + "sbcs r2, r2, r9\n\t" + "sbcs r3, r3, r10\n\t" + "sbcs r4, r4, lr\n\t" + "sbc r5, r5, #0\n\t" + /* mask m and sub from result if overflow */ + "rsb r12, r12, #0\n\t" + "and lr, r12, #1\n\t" + "ldr r6, [sp, #32]\n\t" + "ldr r7, [sp, #36]\n\t" + "ldr r8, [sp, #40]\n\t" + "subs r6, r6, r12\n\t" + "sbcs r7, r7, r12\n\t" + "sbcs r8, r8, r12\n\t" + "sbcs %[a], %[a], #0\n\t" + "sbcs r2, r2, #0\n\t" "sbcs r3, r3, #0\n\t" - "sbcs r4, r4, #0\n\t" - "sbcs r5, r5, #0\n\t" - "sbcs r6, r6, %[a]\n\t" - "sbc r7, r7, r12\n\t" - "str r8, [%[r], #0]\n\t" - "str r9, [%[r], #4]\n\t" - "str r10, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "add sp, sp, #68\n\t" - : [a] "+r" (a) - : [r] "r" (r) - : "memory", "r8", "r9", "r10", "r14", "r3", "r4", "r5", "r6", "r7", "r12" + "sbcs r4, r4, lr\n\t" + "sbc r5, r5, r12\n\t" + "stm %[r]!, {r6, r7, r8}\n\t" + "stm %[r]!, {%[a], r2, r3, r4, r5}\n\t" + "add sp, sp, #0x44\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r12", "r8", "r9", "r10", "lr" ); + (void)m; + (void)mp; } #if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) @@ -30764,134 +69025,126 @@ static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td) */ static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) { - sp_digit r = -1; - sp_digit one = 1; - - + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #28\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "mov r4, #28\n\t" + "\n" + "L_sp_256_cmp_8_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_256_cmp_8_words_%=\n\t" + "eor r2, r2, r3\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); -#endif - - return r; + return (uint32_t)(size_t)a; } /* Normalize the values in each word to 32. @@ -30900,6 +69153,7 @@ static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) */ #define sp_256_norm_8(a) +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -30908,34 +69162,45 @@ static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r6, [%[b], r8]\n\t" - "and r6, r6, %[m]\n\t" - "sbcs r4, r4, r6\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #32\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_256_cond_sub_8_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #32\n\t" + "blt L_sp_256_cond_sub_8_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6" ); -#else - __asm__ __volatile__ ( + return (uint32_t)(size_t)r; +} - "mov r9, #0\n\t" +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +{ + __asm__ __volatile__ ( + "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" @@ -30964,16 +69229,16 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" "stm %[r]!, {r4, r5}\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7" ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#endif /* WOLFSSL_SP_SMALL */ +#ifndef WOLFSSL_SP_SMALL #define sp_256_mont_reduce_order_8 sp_256_mont_reduce_8 /* Reduce the number back to 256 bits using Montgomery reduction. @@ -30982,106 +69247,761 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) { - sp_digit ca = 0; - __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_256_mont_reduce_8_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, r8, r11\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r10, #0\n\t" +#endif + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r10, #0\n\t" +#endif + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" +#endif "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" + /* a[i+7] += m[7] * mu */ +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" +#else + "ldr r11, [%[m], #28]\n\t" +#endif + "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" - "ldr r9, [%[a], #32]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #32]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "ldr r10, [%[a], #32]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #32\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #32\n\t" + "blt L_sp_256_mont_reduce_8_word_%=\n\t" + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); - - sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca); + sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - mp); } +#else +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + __asm__ __volatile__ ( + "mov r1, #0\n\t" + /* # i = 0 */ + "mov r8, #0\n\t" + "\n" + "L_sp_256_mont_reduce_8_word_%=: \n\t" + "mov r4, #0\n\t" + /* mu = a[i] * 1 (mp) = a[i] */ + "ldr r2, [%[a]]\n\t" + /* a[i+0] += -1 * mu */ + "mov r5, r2\n\t" + "str r4, [%[a]]\n\t" + /* a[i+1] += -1 * mu */ + "ldr r6, [%[a], #4]\n\t" + "mov r4, r2\n\t" + "subs r5, r5, r2\n\t" + "sbc r4, r4, #0\n\t" + "adds r5, r5, r6\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[a], #4]\n\t" + /* a[i+2] += -1 * mu */ + "ldr r6, [%[a], #8]\n\t" + "mov r5, r2\n\t" + "subs r4, r4, r2\n\t" + "sbc r5, r5, #0\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, #0\n\t" + "str r4, [%[a], #8]\n\t" + /* a[i+3] += 0 * mu */ + "ldr r6, [%[a], #12]\n\t" + "mov r4, #0\n\t" + "adds r5, r5, r6\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[a], #12]\n\t" + /* a[i+4] += 0 * mu */ + "ldr r6, [%[a], #16]\n\t" + "mov r5, #0\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, #0\n\t" + "str r4, [%[a], #16]\n\t" + /* a[i+5] += 0 * mu */ + "ldr r6, [%[a], #20]\n\t" + "mov r4, #0\n\t" + "adds r5, r5, r6\n\t" + "adc r4, r4, #0\n\t" + "str r5, [%[a], #20]\n\t" + /* a[i+6] += 1 * mu */ + "ldr r6, [%[a], #24]\n\t" + "mov r5, #0\n\t" + "adds r4, r4, r2\n\t" + "adc r5, r5, #0\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, #0\n\t" + "str r4, [%[a], #24]\n\t" + /* a[i+7] += -1 * mu */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[a], #32]\n\t" + "adds r4, r1, r2\n\t" + "mov r1, #0\n\t" + "adc r1, r1, r1\n\t" + "subs r5, r5, r2\n\t" + "sbcs r4, r4, #0\n\t" + "sbc r1, r1, #0\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r1, r1, #0\n\t" + "str r5, [%[a], #28]\n\t" + "str r4, [%[a], #32]\n\t" + /* i += 1 */ + "add r8, r8, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r8, #32\n\t" + "blt L_sp_256_mont_reduce_8_word_%=\n\t" + "mov r2, r1\n\t" + "sub r1, r1, #1\n\t" + "mvn r1, r1\n\t" + "ldm %[a], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "sub %[a], %[a], #32\n\t" + "subs r4, r4, r1\n\t" + "sbcs r5, r5, r1\n\t" + "sbcs r6, r6, r1\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, #0\n\t" + "sbcs r10, r10, r2\n\t" + "sbc r11, r11, r1\n\t" + "stm %[a], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + : [a] "+r" (a) + : + : "memory", "r1", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); + (void)m; + (void)mp; +} + +/* Reduce the number back to 256 bits using Montgomery reduction. + * + * a A single precision number to reduce in place. + * m The single precision number representing the modulus. + * mp The digit representing the negative inverse of m mod 2^n. + */ +static SP_NOINLINE void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m, sp_digit mp) +{ + __asm__ __volatile__ ( +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_256_mont_reduce_order_8_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" +#else + "umull r6, r7, r8, r11\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r10, #0\n\t" +#endif + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r10, #0\n\t" +#endif + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) + "ldr r7, [%[m], #28]\n\t" +#else + "ldr r11, [%[m], #28]\n\t" +#endif + "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "ldr r10, [%[a], #32]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" + "add %[a], %[a], #4\n\t" + "cmp r9, #32\n\t" + "blt L_sp_256_mont_reduce_order_8_word_%=\n\t" + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + ); + sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - mp); +} + +#endif /* WOLFSSL_SP_SMALL */ /* Map the Montgomery form projective coordinate point to an affine point. * * r Resulting affine coordinate point. @@ -31130,67 +70050,39 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { - (void)m; - __asm__ __volatile__ ( "mov r12, #0\n\t" - "ldr r4, [%[a],#0]\n\t" - "ldr r5, [%[a],#4]\n\t" - "ldr r6, [%[a],#8]\n\t" - "ldr r7, [%[a],#12]\n\t" - "ldr r8, [%[b],#0]\n\t" - "ldr r9, [%[b],#4]\n\t" - "ldr r10, [%[b],#8]\n\t" - "ldr r14, [%[b],#12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r],#0]\n\t" - "str r5, [%[r],#4]\n\t" - "str r6, [%[r],#8]\n\t" - "str r7, [%[r],#12]\n\t" - "ldr r4, [%[a],#16]\n\t" - "ldr r5, [%[a],#20]\n\t" - "ldr r6, [%[a],#24]\n\t" - "ldr r7, [%[a],#28]\n\t" - "ldr r8, [%[b],#16]\n\t" - "ldr r9, [%[b],#20]\n\t" - "ldr r10, [%[b],#24]\n\t" - "ldr r14, [%[b],#28]\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r], {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" "adc r3, r12, #0\n\t" "sub r3, r12, r3\n\t" "and r12, r3, #1\n\t" - "ldr r8, [%[r],#0]\n\t" - "ldr r9, [%[r],#4]\n\t" - "ldr r10, [%[r],#8]\n\t" - "ldr r14, [%[r],#12]\n\t" - "subs r8, r8, r3\n\t" - "sbcs r9, r9, r3\n\t" - "sbcs r10, r10, r3\n\t" - "sbcs r14, r14, #0\n\t" - "sbcs r4, r4, #0\n\t" - "sbcs r5, r5, #0\n\t" - "sbcs r6, r6, r12\n\t" - "sbc r7, r7, r3\n\t" - "str r8, [%[r],#0]\n\t" - "str r9, [%[r],#4]\n\t" - "str r10, [%[r],#8]\n\t" - "str r14, [%[r],#12]\n\t" - "str r4, [%[r],#16]\n\t" - "str r5, [%[r],#20]\n\t" - "str r6, [%[r],#24]\n\t" - "str r7, [%[r],#28]\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, r3\n\t" + "sbcs r6, r6, r3\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, #0\n\t" + "sbcs r10, r10, r12\n\t" + "sbc r11, r11, r3\n\t" + "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); } @@ -31202,18 +70094,9 @@ static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, */ static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) { - (void)m; - __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a],#0]\n\t" - "ldr r5, [%[a],#4]\n\t" - "ldr r6, [%[a],#8]\n\t" - "ldr r7, [%[a],#12]\n\t" - "ldr r8, [%[a],#16]\n\t" - "ldr r9, [%[a],#20]\n\t" - "ldr r10, [%[a],#24]\n\t" - "ldr r14, [%[a],#28]\n\t" + "mov r3, #0\n\t" + "ldm %[a], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "adds r4, r4, r4\n\t" "adcs r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" @@ -31221,29 +70104,22 @@ static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) "adcs r8, r8, r8\n\t" "adcs r9, r9, r9\n\t" "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "adc r3, r12, #0\n\t" - "sub r3, r12, r3\n\t" - "and r12, r3, #1\n\t" - "subs r4, r4, r3\n\t" - "sbcs r5, r5, r3\n\t" - "sbcs r6, r6, r3\n\t" + "adcs r11, r11, r11\n\t" + "adc r2, r3, #0\n\t" + "sub r2, r3, r2\n\t" + "and r3, r2, #1\n\t" + "subs r4, r4, r2\n\t" + "sbcs r5, r5, r2\n\t" + "sbcs r6, r6, r2\n\t" "sbcs r7, r7, #0\n\t" "sbcs r8, r8, #0\n\t" "sbcs r9, r9, #0\n\t" - "sbcs r10, r10, r12\n\t" - "sbc r14, r14, r3\n\t" - "str r4, [%[r],#0]\n\t" - "str r5, [%[r],#4]\n\t" - "str r6, [%[r],#8]\n\t" - "str r7, [%[r],#12]\n\t" - "str r8, [%[r],#16]\n\t" - "str r9, [%[r],#20]\n\t" - "str r10, [%[r],#24]\n\t" - "str r14, [%[r],#28]\n\t" + "sbcs r10, r10, r3\n\t" + "sbc r11, r11, r2\n\t" + "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3" ); } @@ -31255,18 +70131,9 @@ static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) */ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) { - (void)m; - __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldr r4, [%[a],#0]\n\t" - "ldr r5, [%[a],#4]\n\t" - "ldr r6, [%[a],#8]\n\t" - "ldr r7, [%[a],#12]\n\t" - "ldr r8, [%[a],#16]\n\t" - "ldr r9, [%[a],#20]\n\t" - "ldr r10, [%[a],#24]\n\t" - "ldr r14, [%[a],#28]\n\t" + "mov r3, #0\n\t" + "ldm %[a], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "adds r4, r4, r4\n\t" "adcs r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" @@ -31274,73 +70141,60 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) "adcs r8, r8, r8\n\t" "adcs r9, r9, r9\n\t" "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "adc r3, r12, #0\n\t" - "sub r3, r12, r3\n\t" - "and r12, r3, #1\n\t" - "subs r4, r4, r3\n\t" - "sbcs r5, r5, r3\n\t" - "sbcs r6, r6, r3\n\t" + "adcs r11, r11, r11\n\t" + "adc r2, r3, #0\n\t" + "sub r2, r3, r2\n\t" + "and r3, r2, #1\n\t" + "subs r4, r4, r2\n\t" + "sbcs r5, r5, r2\n\t" + "sbcs r6, r6, r2\n\t" "sbcs r7, r7, #0\n\t" "sbcs r8, r8, #0\n\t" "sbcs r9, r9, #0\n\t" - "sbcs r10, r10, r12\n\t" - "sbc r14, r14, r3\n\t" - "str r8, [%[r],#16]\n\t" - "str r9, [%[r],#20]\n\t" - "str r10, [%[r],#24]\n\t" - "str r14, [%[r],#28]\n\t" - "mov r12, #0\n\t" - "ldr r8, [%[a],#0]\n\t" - "ldr r9, [%[a],#4]\n\t" - "ldr r10, [%[a],#8]\n\t" - "ldr r14, [%[a],#12]\n\t" + "sbcs r10, r10, r3\n\t" + "sbc r11, r11, r2\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #16]\n\t" + "str r9, [%[r], #20]\n\t" +#else + "strd r8, r9, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [%[r], #24]\n\t" + "str r11, [%[r], #28]\n\t" +#else + "strd r10, r11, [%[r], #24]\n\t" +#endif + "mov r3, #0\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" "adds r8, r8, r4\n\t" "adcs r9, r9, r5\n\t" "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "str r8, [%[r],#0]\n\t" - "str r9, [%[r],#4]\n\t" - "str r10, [%[r],#8]\n\t" - "str r14, [%[r],#12]\n\t" - "ldr r8, [%[a],#16]\n\t" - "ldr r9, [%[a],#20]\n\t" - "ldr r10, [%[a],#24]\n\t" - "ldr r14, [%[a],#28]\n\t" - "ldr r4, [%[r],#16]\n\t" - "ldr r5, [%[r],#20]\n\t" - "ldr r6, [%[r],#24]\n\t" - "ldr r7, [%[r],#28]\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" "adcs r8, r8, r4\n\t" "adcs r9, r9, r5\n\t" "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "adc r3, r12, #0\n\t" - "sub r3, r12, r3\n\t" - "and r12, r3, #1\n\t" - "ldr r4, [%[r],#0]\n\t" - "ldr r5, [%[r],#4]\n\t" - "ldr r6, [%[r],#8]\n\t" - "ldr r7, [%[r],#12]\n\t" - "subs r4, r4, r3\n\t" - "sbcs r5, r5, r3\n\t" - "sbcs r6, r6, r3\n\t" + "adcs r11, r11, r7\n\t" + "sub %[r], %[r], #16\n\t" + "adc r2, r3, #0\n\t" + "sub r2, r3, r2\n\t" + "and r3, r2, #1\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "subs r4, r4, r2\n\t" + "sbcs r5, r5, r2\n\t" + "sbcs r6, r6, r2\n\t" "sbcs r7, r7, #0\n\t" "sbcs r8, r8, #0\n\t" "sbcs r9, r9, #0\n\t" - "sbcs r10, r10, r12\n\t" - "sbc r14, r14, r3\n\t" - "str r4, [%[r],#0]\n\t" - "str r5, [%[r],#4]\n\t" - "str r6, [%[r],#8]\n\t" - "str r7, [%[r],#12]\n\t" - "str r8, [%[r],#16]\n\t" - "str r9, [%[r],#20]\n\t" - "str r10, [%[r],#24]\n\t" - "str r14, [%[r],#28]\n\t" + "sbcs r10, r10, r3\n\t" + "sbc r11, r11, r2\n\t" + "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3" ); } @@ -31351,66 +70205,39 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { - (void)m; - __asm__ __volatile__ ( "mov r12, #0\n\t" - "ldr r4, [%[a],#0]\n\t" - "ldr r5, [%[a],#4]\n\t" - "ldr r6, [%[a],#8]\n\t" - "ldr r7, [%[a],#12]\n\t" - "ldr r8, [%[b],#0]\n\t" - "ldr r9, [%[b],#4]\n\t" - "ldr r10, [%[b],#8]\n\t" - "ldr r14, [%[b],#12]\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "str r4, [%[r],#0]\n\t" - "str r5, [%[r],#4]\n\t" - "str r6, [%[r],#8]\n\t" - "str r7, [%[r],#12]\n\t" - "ldr r4, [%[a],#16]\n\t" - "ldr r5, [%[a],#20]\n\t" - "ldr r6, [%[a],#24]\n\t" - "ldr r7, [%[a],#28]\n\t" - "ldr r8, [%[b],#16]\n\t" - "ldr r9, [%[b],#20]\n\t" - "ldr r10, [%[b],#24]\n\t" - "ldr r14, [%[b],#28]\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "subs r8, r8, r4\n\t" + "sbcs r9, r9, r5\n\t" + "sbcs r10, r10, r6\n\t" + "sbcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "sbcs r8, r8, r4\n\t" + "sbcs r9, r9, r5\n\t" + "sbcs r10, r10, r6\n\t" + "sbcs r11, r11, r7\n\t" "sbc r3, r12, #0\n\t" + "sub %[r], %[r], #16\n\t" "and r12, r3, #1\n\t" - "ldr r8, [%[r],#0]\n\t" - "ldr r9, [%[r],#4]\n\t" - "ldr r10, [%[r],#8]\n\t" - "ldr r14, [%[r],#12]\n\t" - "adds r8, r8, r3\n\t" - "adcs r9, r9, r3\n\t" - "adcs r10, r10, r3\n\t" - "adcs r14, r14, #0\n\t" - "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "adcs r6, r6, r12\n\t" - "adc r7, r7, r3\n\t" - "str r8, [%[r],#0]\n\t" - "str r9, [%[r],#4]\n\t" - "str r10, [%[r],#8]\n\t" - "str r14, [%[r],#12]\n\t" - "str r4, [%[r],#16]\n\t" - "str r5, [%[r],#20]\n\t" - "str r6, [%[r],#24]\n\t" - "str r7, [%[r],#28]\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r3\n\t" + "adcs r6, r6, r3\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, r12\n\t" + "adc r11, r11, r3\n\t" + "stm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r3", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); } @@ -31425,45 +70252,32 @@ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) { __asm__ __volatile__ ( "mov r10, #0\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" -#else - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[a], #8]\n\t" -#endif - "and r14, r4, #1\n\t" - "sub r8, r10, r14\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "and r3, r4, #1\n\t" + "sub r8, r10, r3\n\t" "and r9, r8, #1\n\t" "adds r4, r4, r8\n\t" "adcs r5, r5, r8\n\t" "adcs r6, r6, r8\n\t" "adcs r7, r7, r10\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" -#else - "strd r4, r5, [%[r], #0]\n\t" - "strd r6, r7, [%[r], #8]\n\t" -#endif + "stm %[r], {r4, r5, r6, r7}\n\t" #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[a], #20]\n\t" +#else + "ldrd r4, r5, [%[a], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) "ldr r6, [%[a], #24]\n\t" "ldr r7, [%[a], #28]\n\t" #else - "ldrd r4, r5, [%[a], #16]\n\t" "ldrd r6, r7, [%[a], #24]\n\t" #endif "adcs r4, r4, r10\n\t" "adcs r5, r5, r10\n\t" "adcs r6, r6, r9\n\t" "adcs r7, r7, r8\n\t" - "adc r14, r10, r10\n\t" + "adc r3, r10, r10\n\t" "lsr r8, r4, #1\n\t" "lsr r9, r5, #1\n\t" "lsr r10, r6, #1\n\t" @@ -31471,26 +70285,21 @@ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) "orr r8, r8, r5, lsl #31\n\t" "orr r9, r9, r6, lsl #31\n\t" "orr r10, r10, r7, lsl #31\n\t" - "orr r11, r11, r14, lsl #31\n\t" - "mov r14, r4\n\t" + "orr r11, r11, r3, lsl #31\n\t" + "mov r3, r4\n\t" #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) "str r8, [%[r], #16]\n\t" "str r9, [%[r], #20]\n\t" +#else + "strd r8, r9, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) "str r10, [%[r], #24]\n\t" "str r11, [%[r], #28]\n\t" #else - "strd r8, r9, [%[r], #16]\n\t" "strd r10, r11, [%[r], #24]\n\t" #endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[r], #0]\n\t" - "ldr r5, [%[r], #4]\n\t" - "ldr r6, [%[r], #8]\n\t" - "ldr r7, [%[r], #12]\n\t" -#else - "ldrd r4, r5, [%[r], #0]\n\t" - "ldrd r6, r7, [%[r], #8]\n\t" -#endif + "ldm %[r], {r4, r5, r6, r7}\n\t" "lsr r8, r4, #1\n\t" "lsr r9, r5, #1\n\t" "lsr r10, r6, #1\n\t" @@ -31498,21 +70307,12 @@ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) "orr r8, r8, r5, lsl #31\n\t" "orr r9, r9, r6, lsl #31\n\t" "orr r10, r10, r7, lsl #31\n\t" - "orr r11, r11, r14, lsl #31\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r8, [%[r], #0]\n\t" - "str r9, [%[r], #4]\n\t" - "str r10, [%[r], #8]\n\t" - "str r11, [%[r], #12]\n\t" -#else - "strd r8, r9, [%[r], #0]\n\t" - "strd r10, r11, [%[r], #8]\n\t" -#endif + "orr r11, r11, r3, lsl #31\n\t" + "stm %[r], {r8, r9, r10, r11}\n\t" + : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3" ); - } /* Double the Montgomery form projective point p. @@ -35000,21 +73800,21 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, static void sp_256_add_one_8(sp_digit* a) { __asm__ __volatile__ ( - "ldm %[a], {r2, r3, r4, r5}\n\t" - "adds r2, r2, #1\n\t" - "adcs r3, r3, #0\n\t" - "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "stm %[a]!, {r2, r3, r4, r5}\n\t" - "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" + "adds r1, r1, #1\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "stm %[a]!, {r2, r3, r4, r5}\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" : [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5" + : "memory", "r1", "r2", "r3", "r4" ); } @@ -35267,29 +74067,29 @@ int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out, */ static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #32\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r10, #0\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #32\n\t" + "\n" + "L_sp_256_sub_in_pkace_8_word_%=: \n\t" + "subs r12, r10, r12\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r10, r10\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_256_sub_in_pkace_8_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" ); - - return c; + return (uint32_t)(size_t)a; } #else @@ -35300,134 +74100,447 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) */ static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r11, r11\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) { -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_256_mul_d_8_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" "cmp r9, #32\n\t" - "blt 1b\n\t" + "blt L_sp_256_mul_d_8_word_%=\n\t" "str r3, [%[r], #32]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); +} + #else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) +{ __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else "umull r3, r4, %[b], r8\n\t" +#endif "mov r5, #0\n\t" "str r3, [%[r]], #4\n\t" - "# A[1] * B\n\t" + /* A[1] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[2] * B\n\t" + /* A[2] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[3] * B\n\t" + /* A[3] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[4] * B\n\t" + /* A[4] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[5] * B\n\t" + /* A[5] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[6] * B\n\t" + /* A[6] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[7] * B\n\t" + /* A[7] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" +#endif "str r4, [%[r]], #4\n\t" "str r5, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10" ); -#endif } +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV /* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. @@ -35439,57 +74552,184 @@ static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, */ static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, sp_digit div) +{ + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_256_word_8_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_256_word_8_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* AND m into each word of a and store in r. * * r A single precision integer. @@ -36132,66 +75372,77 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "mov r10, #0\n\t" - "mov r14, #0\n\t" + "mov r11, #0\n\t" + "mov r12, #0\n\t" #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" + "ldr r2, [%[a], #16]\n\t" + "ldr r3, [%[a], #20]\n\t" #else - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[a], #24]\n\t" + "ldrd r2, r3, [%[a], #16]\n\t" #endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" +#else + "ldrd r4, r5, [%[a], #24]\n\t" +#endif + "lsr r6, r2, #1\n\t" + "lsr r7, r3, #1\n\t" "lsr r8, r4, #1\n\t" "lsr r9, r5, #1\n\t" - "lsr r10, r6, #1\n\t" - "lsr r11, r7, #1\n\t" + "orr r6, r6, r3, lsl #31\n\t" + "orr r7, r7, r4, lsl #31\n\t" "orr r8, r8, r5, lsl #31\n\t" - "orr r9, r9, r6, lsl #31\n\t" - "orr r10, r10, r7, lsl #31\n\t" - "orr r11, r11, r14, lsl #31\n\t" - "mov r14, r4\n\t" + "orr r9, r9, r12, lsl #31\n\t" + "mov r12, r2\n\t" #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r8, [%[r], #16]\n\t" - "str r9, [%[r], #20]\n\t" - "str r10, [%[r], #24]\n\t" - "str r11, [%[r], #28]\n\t" + "str r6, [%[r], #16]\n\t" + "str r7, [%[r], #20]\n\t" #else - "strd r8, r9, [%[r], #16]\n\t" - "strd r10, r11, [%[r], #24]\n\t" + "strd r6, r7, [%[r], #16]\n\t" #endif #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[r], #0]\n\t" - "ldr r5, [%[r], #4]\n\t" - "ldr r6, [%[r], #8]\n\t" - "ldr r7, [%[r], #12]\n\t" + "str r8, [%[r], #24]\n\t" + "str r9, [%[r], #28]\n\t" #else - "ldrd r4, r5, [%[r], #0]\n\t" - "ldrd r6, r7, [%[r], #8]\n\t" + "strd r8, r9, [%[r], #24]\n\t" #endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" +#else + "ldrd r2, r3, [%[a]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" +#else + "ldrd r4, r5, [%[a], #8]\n\t" +#endif + "lsr r6, r2, #1\n\t" + "lsr r7, r3, #1\n\t" "lsr r8, r4, #1\n\t" "lsr r9, r5, #1\n\t" - "lsr r10, r6, #1\n\t" - "lsr r11, r7, #1\n\t" + "orr r6, r6, r3, lsl #31\n\t" + "orr r7, r7, r4, lsl #31\n\t" "orr r8, r8, r5, lsl #31\n\t" - "orr r9, r9, r6, lsl #31\n\t" - "orr r10, r10, r7, lsl #31\n\t" - "orr r11, r11, r14, lsl #31\n\t" + "orr r9, r9, r12, lsl #31\n\t" #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r8, [%[r], #0]\n\t" - "str r9, [%[r], #4]\n\t" - "str r10, [%[r], #8]\n\t" - "str r11, [%[r], #12]\n\t" + "str r6, [%[r]]\n\t" + "str r7, [%[r], #4]\n\t" #else - "strd r8, r9, [%[r], #0]\n\t" - "strd r10, r11, [%[r], #8]\n\t" + "strd r6, r7, [%[r]]\n\t" #endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #8]\n\t" + "str r9, [%[r], #12]\n\t" +#else + "strd r8, r9, [%[r], #8]\n\t" +#endif + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10", "r11" ); - } /* Divide the number by 2 mod the modulus. (r = a / 2 % m) @@ -36200,468 +75451,525 @@ static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a) * a Number to divide. * m Modulus. */ -static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, - const sp_digit* m) +static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m) { __asm__ __volatile__ ( - "mov r10, #0\n\t" - "ldr r3, [%[a], #0]\n\t" - "ands r9, r3, #1\n\t" - "beq 1f\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[m], #0]\n\t" - "ldr r8, [%[m], #4]\n\t" - "ldr r10, [%[m], #8]\n\t" - "ldr r14, [%[m], #12]\n\t" - "adds r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r10\n\t" - "adcs r6, r6, r14\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r8, [%[m], #20]\n\t" - "ldr r10, [%[m], #24]\n\t" - "ldr r14, [%[m], #28]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r10\n\t" - "adcs r6, r6, r14\n\t" - "adc r9, r10, r10\n\t" - "b 2f\n\t" - "\n1:\n\t" - "ldr r3, [%[a], #16]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "\n2:\n\t" - "lsr r7, r3, #1\n\t" - "and r3, r3, #1\n\t" - "lsr r8, r4, #1\n\t" - "lsr r10, r5, #1\n\t" - "lsr r14, r6, #1\n\t" - "orr r7, r7, r4, lsl #31\n\t" - "orr r8, r8, r5, lsl #31\n\t" - "orr r10, r10, r6, lsl #31\n\t" - "orr r14, r14, r9, lsl #31\n\t" - "mov r9, r3\n\t" - "str r7, [%[r], #16]\n\t" - "str r8, [%[r], #20]\n\t" - "str r10, [%[r], #24]\n\t" - "str r14, [%[r], #28]\n\t" - "ldr r3, [%[r], #0]\n\t" - "ldr r4, [%[r], #4]\n\t" - "ldr r5, [%[r], #8]\n\t" - "ldr r6, [%[r], #12]\n\t" - "lsr r7, r3, #1\n\t" - "lsr r8, r4, #1\n\t" - "lsr r10, r5, #1\n\t" - "lsr r14, r6, #1\n\t" - "orr r7, r7, r4, lsl #31\n\t" - "orr r8, r8, r5, lsl #31\n\t" - "orr r10, r10, r6, lsl #31\n\t" - "orr r14, r14, r9, lsl #31\n\t" - "str r7, [%[r], #0]\n\t" - "str r8, [%[r], #4]\n\t" - "str r10, [%[r], #8]\n\t" - "str r14, [%[r], #12]\n\t" + "mov r12, #0\n\t" + "ldr r4, [%[a]], #4\n\t" + "ands r3, r4, #1\n\t" + "beq L_sp_256_div2_mod_8_even_%=\n\t" + "ldm %[a]!, {r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "adc r3, r12, r12\n\t" + "b L_sp_256_div2_mod_8_div2_%=\n\t" + "\n" + "L_sp_256_div2_mod_8_even_%=: \n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #12]\n\t" + "ldr r5, [%[a], #16]\n\t" +#else + "ldrd r4, r5, [%[a], #12]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[a], #24]\n\t" +#else + "ldrd r6, r7, [%[a], #20]\n\t" +#endif + "\n" + "L_sp_256_div2_mod_8_div2_%=: \n\t" + "lsr r8, r4, #1\n\t" + "and r4, r4, #1\n\t" + "lsr r9, r5, #1\n\t" + "lsr r10, r6, #1\n\t" + "lsr r11, r7, #1\n\t" + "orr r8, r8, r5, lsl #31\n\t" + "orr r9, r9, r6, lsl #31\n\t" + "orr r10, r10, r7, lsl #31\n\t" + "orr r11, r11, r3, lsl #31\n\t" + "mov r3, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #16]\n\t" + "str r9, [%[r], #20]\n\t" +#else + "strd r8, r9, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r10, [%[r], #24]\n\t" + "str r11, [%[r], #28]\n\t" +#else + "strd r10, r11, [%[r], #24]\n\t" +#endif + "ldm %[r], {r4, r5, r6, r7}\n\t" + "lsr r8, r4, #1\n\t" + "lsr r9, r5, #1\n\t" + "lsr r10, r6, #1\n\t" + "lsr r11, r7, #1\n\t" + "orr r8, r8, r5, lsl #31\n\t" + "orr r9, r9, r6, lsl #31\n\t" + "orr r10, r10, r7, lsl #31\n\t" + "orr r11, r11, r3, lsl #31\n\t" + "stm %[r], {r8, r9, r10, r11}\n\t" + : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10", "r14", "r9" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); } #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) -static int sp_256_num_bits_8(sp_digit* a) +static const unsigned char L_sp_256_num_bits_8_table[] = { + 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, +}; + +static int sp_256_num_bits_8(const sp_digit* a) { - static const byte sp_num_bits_table[256] = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - }; - const byte* table = sp_num_bits_table; - int r = 0; - __asm__ __volatile__ ( - "ldr r2, [%[a], #28]\n\t" - "cmp r2, #0\n\t" - "beq 7f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 307f\n\t" - "mov r3, #248\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n307:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 207f\n\t" - "mov r3, #240\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n207:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 107f\n\t" - "mov r3, #232\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n107:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #224\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n7:\n\t" - "ldr r2, [%[a], #24]\n\t" - "cmp r2, #0\n\t" - "beq 6f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 306f\n\t" - "mov r3, #216\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n306:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 206f\n\t" - "mov r3, #208\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n206:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 106f\n\t" - "mov r3, #200\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n106:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #192\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n6:\n\t" - "ldr r2, [%[a], #20]\n\t" - "cmp r2, #0\n\t" - "beq 5f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 305f\n\t" - "mov r3, #184\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n305:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 205f\n\t" - "mov r3, #176\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n205:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 105f\n\t" - "mov r3, #168\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n105:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #160\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n5:\n\t" - "ldr r2, [%[a], #16]\n\t" - "cmp r2, #0\n\t" - "beq 4f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 304f\n\t" - "mov r3, #152\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n304:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 204f\n\t" - "mov r3, #144\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n204:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 104f\n\t" - "mov r3, #136\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n104:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #128\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n4:\n\t" - "ldr r2, [%[a], #12]\n\t" - "cmp r2, #0\n\t" - "beq 3f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 303f\n\t" - "mov r3, #120\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n303:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 203f\n\t" - "mov r3, #112\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n203:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 103f\n\t" - "mov r3, #104\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n103:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #96\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n3:\n\t" - "ldr r2, [%[a], #8]\n\t" - "cmp r2, #0\n\t" - "beq 2f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 302f\n\t" - "mov r3, #88\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n302:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 202f\n\t" - "mov r3, #80\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n202:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 102f\n\t" - "mov r3, #72\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n102:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #64\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n2:\n\t" - "ldr r2, [%[a], #4]\n\t" - "cmp r2, #0\n\t" - "beq 1f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 301f\n\t" - "mov r3, #56\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n301:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 201f\n\t" - "mov r3, #48\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n201:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 101f\n\t" - "mov r3, #40\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n101:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #32\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n1:\n\t" - "ldr r2, [%[a], #0]\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 300f\n\t" - "mov r3, #24\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n300:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 200f\n\t" - "mov r3, #16\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n200:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 100f\n\t" - "mov r3, #8\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n100:\n\t" - "and r4, r2, #0xff\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "\n9:\n\t" - : [r] "+r" (r) - : [a] "r" (a), [table] "r" (table) - : "r2", "r3", "r4" + "mov lr, %[L_sp_256_num_bits_8_table]\n\t" + "ldr r1, [%[a], #28]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_7_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_7_3_%=\n\t" + "mov r2, #0xf8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_7_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_7_2_%=\n\t" + "mov r2, #0xf0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_7_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_7_1_%=\n\t" + "mov r2, #0xe8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_7_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xe0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_7_%=: \n\t" + "ldr r1, [%[a], #24]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_6_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_6_3_%=\n\t" + "mov r2, #0xd8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_6_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_6_2_%=\n\t" + "mov r2, #0xd0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_6_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_6_1_%=\n\t" + "mov r2, #0xc8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_6_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xc0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_6_%=: \n\t" + "ldr r1, [%[a], #20]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_5_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_5_3_%=\n\t" + "mov r2, #0xb8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_5_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_5_2_%=\n\t" + "mov r2, #0xb0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_5_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_5_1_%=\n\t" + "mov r2, #0xa8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_5_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xa0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_5_%=: \n\t" + "ldr r1, [%[a], #16]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_4_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_4_3_%=\n\t" + "mov r2, #0x98\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_4_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_4_2_%=\n\t" + "mov r2, #0x90\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_4_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_4_1_%=\n\t" + "mov r2, #0x88\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_4_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x80\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_4_%=: \n\t" + "ldr r1, [%[a], #12]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_3_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_3_3_%=\n\t" + "mov r2, #0x78\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_3_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_3_2_%=\n\t" + "mov r2, #0x70\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_3_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_3_1_%=\n\t" + "mov r2, #0x68\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_3_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x60\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_3_%=: \n\t" + "ldr r1, [%[a], #8]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_2_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_2_3_%=\n\t" + "mov r2, #0x58\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_2_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_2_2_%=\n\t" + "mov r2, #0x50\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_2_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_2_1_%=\n\t" + "mov r2, #0x48\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_2_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x40\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_2_%=: \n\t" + "ldr r1, [%[a], #4]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_1_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_1_3_%=\n\t" + "mov r2, #56\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_1_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_1_2_%=\n\t" + "mov r2, #48\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_1_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_1_1_%=\n\t" + "mov r2, #40\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_1_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #32\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_1_%=: \n\t" + "ldr r1, [%[a]]\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_0_3_%=\n\t" + "mov r2, #24\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_0_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_0_2_%=\n\t" + "mov r2, #16\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_0_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_256_num_bits_8_0_1_%=\n\t" + "mov r2, #8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_0_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "ldrb r12, [lr, r3]\n\t" + "\n" + "L_sp_256_num_bits_8_9_%=: \n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a) + : [L_sp_256_num_bits_8_table] "r" (L_sp_256_num_bits_8_table) + : "memory", "r1", "r2", "r3", "r12", "lr" ); - - return r; + return (uint32_t)(size_t)a; } + #else -static int sp_256_num_bits_8(sp_digit* a) +static int sp_256_num_bits_8(const sp_digit* a) { - int r = 0; - __asm__ __volatile__ ( - "ldr r2, [%[a], #28]\n\t" - "cmp r2, #0\n\t" - "beq 7f\n\t" - "mov r3, #256\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n7:\n\t" - "ldr r2, [%[a], #24]\n\t" - "cmp r2, #0\n\t" - "beq 6f\n\t" - "mov r3, #224\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n6:\n\t" - "ldr r2, [%[a], #20]\n\t" - "cmp r2, #0\n\t" - "beq 5f\n\t" - "mov r3, #192\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n5:\n\t" - "ldr r2, [%[a], #16]\n\t" - "cmp r2, #0\n\t" - "beq 4f\n\t" - "mov r3, #160\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n4:\n\t" - "ldr r2, [%[a], #12]\n\t" - "cmp r2, #0\n\t" - "beq 3f\n\t" - "mov r3, #128\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n3:\n\t" - "ldr r2, [%[a], #8]\n\t" - "cmp r2, #0\n\t" - "beq 2f\n\t" - "mov r3, #96\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n2:\n\t" - "ldr r2, [%[a], #4]\n\t" - "cmp r2, #0\n\t" - "beq 1f\n\t" - "mov r3, #64\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 9f\n\t" - "\n1:\n\t" - "ldr r2, [%[a], #0]\n\t" - "mov r3, #32\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "\n9:\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "r2", "r3" - ); - - return r; -} + "ldr r1, [%[a], #28]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_7_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x0\n\t" +#else + "mov r2, #0x100\n\t" #endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_7_%=: \n\t" + "ldr r1, [%[a], #24]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_6_%=\n\t" + "mov r2, #0xe0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_6_%=: \n\t" + "ldr r1, [%[a], #20]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_5_%=\n\t" + "mov r2, #0xc0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_5_%=: \n\t" + "ldr r1, [%[a], #16]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_4_%=\n\t" + "mov r2, #0xa0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_4_%=: \n\t" + "ldr r1, [%[a], #12]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_3_%=\n\t" + "mov r2, #0x80\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_3_%=: \n\t" + "ldr r1, [%[a], #8]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_2_%=\n\t" + "mov r2, #0x60\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_2_%=: \n\t" + "ldr r1, [%[a], #4]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_256_num_bits_8_1_%=\n\t" + "mov r2, #0x40\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_256_num_bits_8_9_%=\n\t" + "\n" + "L_sp_256_num_bits_8_1_%=: \n\t" + "ldr r1, [%[a]]\n\t" + "mov r2, #32\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "\n" + "L_sp_256_num_bits_8_9_%=: \n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a) + : + : "memory", "r1", "r2", "r3", "r12", "lr" + ); + return (uint32_t)(size_t)a; +} +#endif /* WOLFSSL_SP_ARM_ARCH && (WOLFSSL_SP_ARM_ARCH < 7) */ /* Non-constant time modular inversion. * * @param [out] r Resulting number. @@ -37755,54 +77063,81 @@ static const sp_digit p384_b[12] = { static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { __asm__ __volatile__ ( - "sub sp, sp, #96\n\t" + "sub sp, sp, #0x60\n\t" "mov r5, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" + "\n" + "L_sp_384_mul_12_outer_%=: \n\t" "subs r3, r5, #44\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_384_mul_12_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" "cmp r3, #48\n\t" - "beq 3f\n\t" + "beq L_sp_384_mul_12_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_384_mul_12_inner_%=\n\t" + "\n" + "L_sp_384_mul_12_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #88\n\t" - "ble 1b\n\t" + "cmp r5, #0x58\n\t" + "ble L_sp_384_mul_12_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_384_mul_12_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "bgt L_sp_384_mul_12_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); } @@ -37818,980 +77153,5476 @@ static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) __asm__ __volatile__ ( "sub sp, sp, #48\n\t" "mov r10, #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r11, [%[a], #0]\n\t" - "ldr r12, [%[b], #0]\n\t" + /* A[0] * B[0] */ + "ldr r11, [%[a]]\n\t" + "ldr r12, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r3, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "mov r5, #0\n\t" +#else "umull r3, r4, r11, r12\n\t" "mov r5, #0\n\t" +#endif "str r3, [sp]\n\t" - "# A[0] * B[1]\n\t" + /* A[0] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[0]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[0] */ "ldr r8, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #4]\n\t" - "# A[2] * B[0]\n\t" + /* A[2] * B[0] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[1] */ "ldr r11, [%[a], #4]\n\t" "ldr r12, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[2]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[2] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #8]\n\t" - "# A[0] * B[3]\n\t" + /* A[0] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[2]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[2] */ "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[1] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[0] */ "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #12]\n\t" - "# A[4] * B[0]\n\t" + /* A[4] * B[0] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[1] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[2] */ "ldr r11, [%[a], #8]\n\t" "ldr r12, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[3] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[4]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[4] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #16]\n\t" - "# A[0] * B[5]\n\t" + /* A[0] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[4]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[4] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[2] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[1] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[0] */ "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #20]\n\t" - "# A[6] * B[0]\n\t" + /* A[6] * B[0] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[1]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[1] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[2] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[3] */ "ldr r11, [%[a], #12]\n\t" "ldr r12, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[4] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[5] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[0] * B[6]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[0] * B[6] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #24]\n\t" - "# A[0] * B[7]\n\t" + /* A[0] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[6]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[6] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[5] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[4] */ "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[3] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[2] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[1]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[1] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[0] */ "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #28]\n\t" - "# A[8] * B[0]\n\t" + /* A[8] * B[0] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[7] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[1] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[2] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[3] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[4] */ "ldr r11, [%[a], #16]\n\t" "ldr r12, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[5] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[6] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[1] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[7] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[8]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[8] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #32]\n\t" - "# A[0] * B[9]\n\t" + /* A[0] * B[9] */ "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[8]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[8] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[7] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[6] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[4] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[3] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[2] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[1] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[0] */ "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #36]\n\t" - "# A[10] * B[0]\n\t" + /* A[10] * B[0] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[9] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[1] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[2] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[3] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[4] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[5] */ "ldr r11, [%[a], #20]\n\t" "ldr r12, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[6] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[7] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[8] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[9] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[10]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[10] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #40]\n\t" - "# A[0] * B[11]\n\t" + /* A[0] * B[11] */ "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[10]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[10] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[9] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[8] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[7] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[6] */ "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[5] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[4] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[3] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[2] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[1] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[0] */ "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #44]\n\t" - "# A[11] * B[1]\n\t" + /* A[11] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[10] * B[2]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[2] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[3] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[4] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[5] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[6] */ "ldr r11, [%[a], #24]\n\t" "ldr r12, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[7] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[8] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[9] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[10] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[11] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #48]\n\t" - "# A[2] * B[11]\n\t" + /* A[2] * B[11] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[10]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[10] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[9] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[8] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[6] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[5] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[4] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[3] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[2] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #52]\n\t" - "# A[11] * B[3]\n\t" + /* A[11] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[10] * B[4]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[4] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[5] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[6] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[7] */ "ldr r11, [%[a], #28]\n\t" "ldr r12, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[8] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[9] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[10]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[10] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[11] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #56]\n\t" - "# A[4] * B[11]\n\t" + /* A[4] * B[11] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[10]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[10] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[9] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[8] */ "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[7] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[6] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[5] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[4] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #60]\n\t" - "# A[11] * B[5]\n\t" + /* A[11] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[10] * B[6]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[6] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[7] */ "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[8] */ "ldr r11, [%[a], #32]\n\t" "ldr r12, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[9] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[10]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[10] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[11]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[11] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #64]\n\t" - "# A[6] * B[11]\n\t" + /* A[6] * B[11] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[7] * B[10]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[10] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[9] */ "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[8] */ "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[7] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[6] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #68]\n\t" - "# A[11] * B[7]\n\t" + /* A[11] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[10] * B[8]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[8] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[9] */ "ldr r11, [%[a], #36]\n\t" "ldr r12, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[10] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[11] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #72]\n\t" - "# A[8] * B[11]\n\t" + /* A[8] * B[11] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[9] * B[10]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[10] */ "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[9] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[8] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #76]\n\t" - "# A[11] * B[9]\n\t" + /* A[11] * B[9] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[10] * B[10]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[10] */ "ldr r11, [%[a], #40]\n\t" "ldr r12, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[11] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #80]\n\t" - "# A[10] * B[11]\n\t" + /* A[10] * B[11] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[11] * B[10]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[10] */ "ldr r8, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #84]\n\t" - "# A[11] * B[11]\n\t" + /* A[11] * B[11] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" +#endif "str r4, [%[r], #88]\n\t" "str r5, [%[r], #92]\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" @@ -38800,9 +82631,8 @@ static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) "stm %[r]!, {r3, r4, r5, r6}\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sub %[r], %[r], #48\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } @@ -38817,77 +82647,132 @@ static void sp_384_mul_12(sp_digit* r, const sp_digit* a, const sp_digit* b) static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "sub sp, sp, #96\n\t" + "sub sp, sp, #0x60\n\t" "mov r12, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" "mov r5, #0\n\t" - "\n1:\n\t" + "\n" + "L_sp_384_sqr_12_outer_%=: \n\t" "subs r3, r5, #44\n\t" "it cc\n\t" "movcc r3, r12\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" + "\n" + "L_sp_384_sqr_12_inner_%=: \n\t" "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "beq L_sp_384_sqr_12_op_sqr_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "bal L_sp_384_sqr_12_op_done_%=\n\t" + "\n" + "L_sp_384_sqr_12_op_sqr_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_384_sqr_12_op_done_%=: \n\t" "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" "cmp r3, #48\n\t" - "beq 3f\n\t" + "beq L_sp_384_sqr_12_inner_done_%=\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" + "bgt L_sp_384_sqr_12_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_384_sqr_12_inner_%=\n\t" + "\n" + "L_sp_384_sqr_12_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #88\n\t" - "ble 1b\n\t" + "cmp r5, #0x58\n\t" + "ble L_sp_384_sqr_12_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r9, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r9, [%[r], #12]\n\t" -#else - "ldrd r6, r7, [sp, #0]\n\t" - "ldrd r8, r9, [sp, #8]\n\t" - "strd r6, r7, [%[r], #0]\n\t" - "strd r8, r9, [%[r], #8]\n\t" -#endif - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_384_sqr_12_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "bgt L_sp_384_sqr_12_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "r12" ); } @@ -38901,108 +82786,514 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( "sub sp, sp, #48\n\t" - "mov r12, #0\n\t" - "# A[0] * A[0]\n\t" - "ldr r10, [%[a], #0]\n\t" + /* A[0] * A[0] */ + "ldr r10, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r9, r10, #16\n\t" + "lsl r2, r10, #16\n\t" + "lsr r2, r2, #16\n\t" + "mul r8, r2, r2\n\t" + "mul r3, r9, r9\n\t" + "mul r2, r9, r2\n\t" + "lsr r9, r2, #15\n\t" + "lsl r2, r2, #17\n\t" + "adds r8, r8, r2\n\t" + "adc r3, r3, r9\n\t" +#else "umull r8, r3, r10, r10\n\t" +#endif "mov r4, #0\n\t" "str r8, [sp]\n\t" - "# A[0] * A[1]\n\t" + /* A[0] * A[1] */ "ldr r10, [%[a], #4]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #4]\n\t" - "# A[0] * A[2]\n\t" + /* A[0] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[1] * A[1]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * A[1] */ "ldr r10, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #8]\n\t" - "# A[0] * A[3]\n\t" + /* A[0] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[1] * A[2]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [sp, #12]\n\t" - "# A[0] * A[4]\n\t" + /* A[0] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[1] * A[3]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[1] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[2] * A[2]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[2] * A[2] */ "ldr r10, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #16]\n\t" - "# A[0] * A[5]\n\t" + /* A[0] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[4]\n\t" + /* A[1] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -39010,66 +83301,294 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #20]\n\t" - "# A[0] * A[6]\n\t" + /* A[0] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[5]\n\t" + /* A[1] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[3] */ "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #24]\n\t" - "# A[0] * A[7]\n\t" + /* A[0] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[6]\n\t" + /* A[1] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -39077,80 +83596,370 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #28]\n\t" - "# A[0] * A[8]\n\t" + /* A[0] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[7]\n\t" + /* A[1] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[4] */ "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #32]\n\t" - "# A[0] * A[9]\n\t" + /* A[0] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[8]\n\t" + /* A[1] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -39158,94 +83967,446 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #36]\n\t" - "# A[0] * A[10]\n\t" + /* A[0] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[9]\n\t" + /* A[1] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[5] */ "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #40]\n\t" - "# A[0] * A[11]\n\t" + /* A[0] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[10]\n\t" + /* A[1] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -39253,87 +84414,408 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #44]\n\t" - "# A[1] * A[11]\n\t" + /* A[1] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[2] * A[10]\n\t" + /* A[2] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[6] */ "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #48]\n\t" - "# A[2] * A[11]\n\t" + /* A[2] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[3] * A[10]\n\t" + /* A[3] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -39341,73 +84823,332 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #52]\n\t" - "# A[3] * A[11]\n\t" + /* A[3] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[4] * A[10]\n\t" + /* A[4] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[7] */ "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #56]\n\t" - "# A[4] * A[11]\n\t" + /* A[4] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[5] * A[10]\n\t" + /* A[5] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -39415,59 +85156,256 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #60]\n\t" - "# A[5] * A[11]\n\t" + /* A[5] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[6] * A[10]\n\t" + /* A[6] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[8] */ "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #64]\n\t" - "# A[6] * A[11]\n\t" + /* A[6] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[7] * A[10]\n\t" + /* A[7] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -39475,87 +85413,415 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #68]\n\t" - "# A[7] * A[11]\n\t" + /* A[7] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[8] * A[10]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[9] * A[9]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * A[9] */ "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [%[r], #72]\n\t" - "# A[8] * A[11]\n\t" + /* A[8] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[9] * A[10]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[9] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [%[r], #76]\n\t" - "# A[9] * A[11]\n\t" + /* A[9] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[10] * A[10]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * A[10] */ "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #80]\n\t" - "# A[10] * A[11]\n\t" + /* A[10] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [%[r], #84]\n\t" - "# A[11] * A[11]\n\t" + /* A[11] * A[11] */ "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adc r4, r4, r9\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adc r4, r4, r9\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adc r4, r4, r9\n\t" +#endif "str r3, [%[r], #88]\n\t" "str r4, [%[r], #92]\n\t" "ldm sp!, {r2, r3, r4, r8}\n\t" @@ -39564,10 +85830,9 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "stm %[r]!, {r2, r3, r4, r8}\n\t" "ldm sp!, {r2, r3, r4, r8}\n\t" "stm %[r]!, {r2, r3, r4, r8}\n\t" - "sub %[r], %[r], #48\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r12" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); } @@ -39579,15 +85844,14 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #48\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #48\n\t" + "\n" + "L_sp_384_add_12_word_%=: \n\t" + "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" @@ -39596,15 +85860,15 @@ static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, "adcs r7, r7, r11\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "adc r3, r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne L_sp_384_add_12_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -39614,41 +85878,37 @@ static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "adc %[c], r14, r14\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "adc %[r], r12, r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -39659,31 +85919,30 @@ static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #48\n\t" - "\n1:\n\t" - "rsbs %[c], %[c], #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #48\n\t" + "\n" + "L_sp_384_sub_12_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r4, r4\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_384_sub_12_word_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -39693,40 +85952,36 @@ static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -40024,6 +86279,7 @@ static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm) return err; } +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -40032,34 +86288,45 @@ static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r6, [%[b], r8]\n\t" - "and r6, r6, %[m]\n\t" - "sbcs r4, r4, r6\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #48\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_384_cond_sub_12_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #48\n\t" + "blt L_sp_384_cond_sub_12_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6" ); -#else - __asm__ __volatile__ ( + return (uint32_t)(size_t)r; +} - "mov r9, #0\n\t" +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +{ + __asm__ __volatile__ ( + "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" @@ -40102,16 +86369,15 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digi "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" "stm %[r]!, {r4, r5}\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7" ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#endif /* WOLFSSL_SP_SMALL */ #define sp_384_mont_reduce_order_12 sp_384_mont_reduce_12 /* Reduce the number back to 384 bits using Montgomery reduction. @@ -40120,140 +86386,466 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digi * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m, sp_digit mp) { - sp_digit ca = 0; - __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_384_mont_reduce_12_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, r8, r11\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r10, #0\n\t" +#endif + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r10, #0\n\t" +#endif + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" +#endif "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" + /* a[i+11] += m[11] * mu */ +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" +#else + "ldr r11, [%[m], #44]\n\t" +#endif + "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" - "ldr r9, [%[a], #48]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #48]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" + "ldr r10, [%[a], #48]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #48\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "cmp r9, #48\n\t" + "blt L_sp_384_mont_reduce_12_word_%=\n\t" + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); - - sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca); + sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - mp); } /* Multiply two Montgomery form numbers mod the modulus (prime). @@ -40407,178 +86999,170 @@ static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td) */ static sp_int32 sp_384_cmp_12(const sp_digit* a, const sp_digit* b) { - sp_digit r = -1; - sp_digit one = 1; - - + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #44\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "mov r4, #44\n\t" + "\n" + "L_sp_384_cmp_12_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_384_cmp_12_words_%=\n\t" + "eor r2, r2, r3\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); -#endif - - return r; + return (uint32_t)(size_t)a; } /* Normalize the values in each word to 32. @@ -40635,8 +87219,7 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { sp_digit o; @@ -40683,35 +87266,33 @@ static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r7, #0\n\t" + "mov lr, #0\n\t" "mov r6, #0\n\t" - "1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" + "mov r12, #0\n\t" + "\n" + "L_sp_384_cond_add_12_words_%=: \n\t" + "adds lr, lr, #-1\n\t" + "ldr r4, [%[a], r12]\n\t" + "ldr r5, [%[b], r12]\n\t" "and r5, r5, %[m]\n\t" "adcs r4, r4, r5\n\t" - "adc %[c], r7, r7\n\t" - "str r4, [%[r], r6]\n\t" - "add r6, r6, #4\n\t" - "cmp r6, #48\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7" + "adc lr, r6, r6\n\t" + "str r4, [%[r], r12]\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #48\n\t" + "blt L_sp_384_cond_add_12_words_%=\n\t" + "mov %[r], lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)r; } -#endif /* WOLFSSL_SP_SMALL */ -#ifndef WOLFSSL_SP_SMALL +#else /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -40720,138 +87301,61 @@ static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digi * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r8, #0\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" -#else - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" -#endif + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" -#else - "strd r4, r5, [%[r], #0]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" -#else - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #8]\n\t" - "str r5, [%[r], #12]\n\t" -#else - "strd r4, r5, [%[r], #8]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" -#else - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" -#else - "strd r4, r5, [%[r], #16]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" -#else - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #24]\n\t" - "str r5, [%[r], #28]\n\t" -#else - "strd r4, r5, [%[r], #24]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" -#else - "ldrd r4, r5, [%[a], #32]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" -#else - "strd r4, r5, [%[r], #32]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" -#else - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #40]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #40]\n\t" - "str r5, [%[r], #44]\n\t" -#else - "strd r4, r5, [%[r], #40]\n\t" -#endif - "adc %[c], r8, r8\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8" + "stm %[r]!, {r4, r5}\n\t" + "adc %[r], r8, r8\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } -#endif /* !WOLFSSL_SP_SMALL */ +#endif /* WOLFSSL_SP_SMALL */ /* Subtract two Montgomery form numbers (r = a - b % m). * * r Result of subtration. @@ -40859,8 +87363,7 @@ static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digi * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { sp_digit o; @@ -40869,20 +87372,18 @@ static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b } #define sp_384_mont_sub_lower_12 sp_384_mont_sub_12 +#ifdef WOLFSSL_SP_SMALL +#else +#endif /* WOLFSSL_SP_SMALL */ static void sp_384_rshift1_12(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r2, [%[a]]\n\t" - "ldr r3, [%[a], #4]\n\t" -#else - "ldrd r2, r3, [%[a]]\n\t" -#endif + "ldm %[a], {r2, r3}\n\t" "lsr r2, r2, #1\n\t" "orr r2, r2, r3, lsl #31\n\t" "lsr r3, r3, #1\n\t" "ldr r4, [%[a], #8]\n\t" - "str r2, [%[r], #0]\n\t" + "str r2, [%[r]]\n\t" "orr r3, r3, r4, lsl #31\n\t" "lsr r4, r4, #1\n\t" "ldr r2, [%[a], #12]\n\t" @@ -40923,8 +87424,8 @@ static void sp_384_rshift1_12(sp_digit* r, const sp_digit* a) "lsr r4, r4, #1\n\t" "str r3, [%[r], #40]\n\t" "str r4, [%[r], #44]\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4" ); } @@ -44487,27 +90988,27 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, static void sp_384_add_one_12(sp_digit* a) { __asm__ __volatile__ ( - "ldm %[a], {r2, r3, r4, r5}\n\t" - "adds r2, r2, #1\n\t" - "adcs r3, r3, #0\n\t" - "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "stm %[a]!, {r2, r3, r4, r5}\n\t" - "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" + "adds r1, r1, #1\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "stm %[a]!, {r2, r3, r4, r5}\n\t" - "ldm %[a], {r2, r3, r4, r5}\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" + "adcs r1, r1, #0\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "stm %[a]!, {r2, r3, r4, r5}\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" : [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5" + : "memory", "r1", "r2", "r3", "r4" ); } @@ -44760,29 +91261,29 @@ int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out, */ static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #48\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r10, #0\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #48\n\t" + "\n" + "L_sp_384_sub_in_pkace_12_word_%=: \n\t" + "subs r12, r10, r12\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r10, r10\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_384_sub_in_pkace_12_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" ); - - return c; + return (uint32_t)(size_t)a; } #else @@ -44793,169 +91294,614 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) */ static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r11, r11\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b) { -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_384_mul_d_12_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" "cmp r9, #48\n\t" - "blt 1b\n\t" + "blt L_sp_384_mul_d_12_word_%=\n\t" "str r3, [%[r], #48]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); +} + #else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, sp_digit b) +{ __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else "umull r3, r4, %[b], r8\n\t" +#endif "mov r5, #0\n\t" "str r3, [%[r]], #4\n\t" - "# A[1] * B\n\t" + /* A[1] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[2] * B\n\t" + /* A[2] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[3] * B\n\t" + /* A[3] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[4] * B\n\t" + /* A[4] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[5] * B\n\t" + /* A[5] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[6] * B\n\t" + /* A[6] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[7] * B\n\t" + /* A[7] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[8] * B\n\t" + /* A[8] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[9] * B\n\t" + /* A[9] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[10] * B\n\t" + /* A[10] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[11] * B\n\t" + /* A[11] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adc r3, r3, r7\n\t" +#endif "str r5, [%[r]], #4\n\t" "str r3, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10" ); -#endif } +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV /* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. @@ -44967,57 +91913,184 @@ static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, */ static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_384_word_12(sp_digit d1, sp_digit d0, sp_digit div) +{ + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_384_word_12_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_384_word_12_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* AND m into each word of a and store in r. * * r A single precision integer. @@ -45638,697 +92711,860 @@ int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng, * a Number to divide. * m Modulus. */ -static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, - const sp_digit* m) +static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m) { __asm__ __volatile__ ( - "ldr r4, [%[a]]\n\t" - "ands r8, r4, #1\n\t" - "beq 1f\n\t" - "mov r12, #0\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[m], #0]\n\t" - "ldr r9, [%[m], #4]\n\t" - "ldr r10, [%[m], #8]\n\t" - "ldr r14, [%[m], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[m], #16]\n\t" - "ldr r9, [%[m], #20]\n\t" - "ldr r10, [%[m], #24]\n\t" - "ldr r14, [%[m], #28]\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[a], #40]\n\t" - "ldr r7, [%[a], #44]\n\t" - "ldr r8, [%[m], #32]\n\t" - "ldr r9, [%[m], #36]\n\t" - "ldr r10, [%[m], #40]\n\t" - "ldr r14, [%[m], #44]\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "str r6, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" - "adc r8, r12, r12\n\t" - "b 2f\n\t" - "\n1:\n\t" - "ldr r5, [%[a], #2]\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #2]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #6]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #6]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #10]\n\t" - "str r4, [%[r], #8]\n\t" - "str r5, [%[r], #10]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[a], #14]\n\t" - "str r4, [%[r], #12]\n\t" - "str r5, [%[r], #14]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #18]\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #18]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #22]\n\t" - "str r4, [%[r], #20]\n\t" - "str r5, [%[r], #22]\n\t" - "\n2:\n\t" - "ldr r3, [%[r]]\n\t" - "ldr r4, [%[r], #4]\n\t" - "lsr r3, r3, #1\n\t" - "orr r3, r3, r4, lsl #31\n\t" - "lsr r4, r4, #1\n\t" - "ldr r5, [%[a], #8]\n\t" - "str r3, [%[r], #0]\n\t" - "orr r4, r4, r5, lsl #31\n\t" - "lsr r5, r5, #1\n\t" - "ldr r3, [%[a], #12]\n\t" - "str r4, [%[r], #4]\n\t" - "orr r5, r5, r3, lsl #31\n\t" - "lsr r3, r3, #1\n\t" - "ldr r4, [%[a], #16]\n\t" - "str r5, [%[r], #8]\n\t" - "orr r3, r3, r4, lsl #31\n\t" - "lsr r4, r4, #1\n\t" - "ldr r5, [%[a], #20]\n\t" - "str r3, [%[r], #12]\n\t" - "orr r4, r4, r5, lsl #31\n\t" - "lsr r5, r5, #1\n\t" - "ldr r3, [%[a], #24]\n\t" - "str r4, [%[r], #16]\n\t" - "orr r5, r5, r3, lsl #31\n\t" - "lsr r3, r3, #1\n\t" - "ldr r4, [%[a], #28]\n\t" - "str r5, [%[r], #20]\n\t" - "orr r3, r3, r4, lsl #31\n\t" - "lsr r4, r4, #1\n\t" - "ldr r5, [%[a], #32]\n\t" - "str r3, [%[r], #24]\n\t" - "orr r4, r4, r5, lsl #31\n\t" - "lsr r5, r5, #1\n\t" - "ldr r3, [%[a], #36]\n\t" - "str r4, [%[r], #28]\n\t" - "orr r5, r5, r3, lsl #31\n\t" - "lsr r3, r3, #1\n\t" - "ldr r4, [%[a], #40]\n\t" - "str r5, [%[r], #32]\n\t" - "orr r3, r3, r4, lsl #31\n\t" - "lsr r4, r4, #1\n\t" - "ldr r5, [%[a], #44]\n\t" - "str r3, [%[r], #36]\n\t" - "orr r4, r4, r5, lsl #31\n\t" - "lsr r5, r5, #1\n\t" - "orr r5, r5, r8, lsl #31\n\t" - "str r4, [%[r], #40]\n\t" - "str r5, [%[r], #44]\n\t" + "ldr r4, [%[a]], #4\n\t" + "ands r3, r4, #1\n\t" + "beq L_sp_384_div2_mod_12_even_%=\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "adc r3, r12, r12\n\t" + "b L_sp_384_div2_mod_12_div2_%=\n\t" + "\n" + "L_sp_384_div2_mod_12_even_%=: \n\t" + "ldm %[a]!, {r5, r6, r7}\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "\n" + "L_sp_384_div2_mod_12_div2_%=: \n\t" + "sub %[r], #48\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[r]]\n\t" + "ldr r9, [%[r], #4]\n\t" +#else + "ldrd r8, r9, [%[r]]\n\t" +#endif + "lsr r8, r8, #1\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #8]\n\t" + "str r8, [%[r]]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #12]\n\t" + "str r9, [%[r], #4]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #16]\n\t" + "str r10, [%[r], #8]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #20]\n\t" + "str r8, [%[r], #12]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #24]\n\t" + "str r9, [%[r], #16]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #28]\n\t" + "str r10, [%[r], #20]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #32]\n\t" + "str r8, [%[r], #24]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #36]\n\t" + "str r9, [%[r], #28]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #40]\n\t" + "str r10, [%[r], #32]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #44]\n\t" + "str r8, [%[r], #36]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "orr r10, r10, r3, lsl #31\n\t" + "str r9, [%[r], #40]\n\t" + "str r10, [%[r], #44]\n\t" + : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); } #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) -static int sp_384_num_bits_12(sp_digit* a) +static const unsigned char L_sp_384_num_bits_12_table[] = { + 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, +}; + +static int sp_384_num_bits_12(const sp_digit* a) { - static const byte sp_num_bits_table[256] = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - }; - const byte* table = sp_num_bits_table; - int r = 0; - __asm__ __volatile__ ( - "ldr r2, [%[a], #44]\n\t" - "cmp r2, #0\n\t" - "beq 11f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 311f\n\t" - "mov r3, #376\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n311:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 211f\n\t" - "mov r3, #368\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n211:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 111f\n\t" - "mov r3, #360\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n111:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #352\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n11:\n\t" - "ldr r2, [%[a], #40]\n\t" - "cmp r2, #0\n\t" - "beq 10f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 310f\n\t" - "mov r3, #344\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n310:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 210f\n\t" - "mov r3, #336\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n210:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 110f\n\t" - "mov r3, #328\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n110:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #320\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n10:\n\t" - "ldr r2, [%[a], #36]\n\t" - "cmp r2, #0\n\t" - "beq 9f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 309f\n\t" - "mov r3, #312\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n309:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 209f\n\t" - "mov r3, #304\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n209:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 109f\n\t" - "mov r3, #296\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n109:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #288\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n9:\n\t" - "ldr r2, [%[a], #32]\n\t" - "cmp r2, #0\n\t" - "beq 8f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 308f\n\t" - "mov r3, #280\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n308:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 208f\n\t" - "mov r3, #272\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n208:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 108f\n\t" - "mov r3, #264\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n108:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #256\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n8:\n\t" - "ldr r2, [%[a], #28]\n\t" - "cmp r2, #0\n\t" - "beq 7f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 307f\n\t" - "mov r3, #248\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n307:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 207f\n\t" - "mov r3, #240\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n207:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 107f\n\t" - "mov r3, #232\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n107:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #224\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n7:\n\t" - "ldr r2, [%[a], #24]\n\t" - "cmp r2, #0\n\t" - "beq 6f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 306f\n\t" - "mov r3, #216\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n306:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 206f\n\t" - "mov r3, #208\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n206:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 106f\n\t" - "mov r3, #200\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n106:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #192\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n6:\n\t" - "ldr r2, [%[a], #20]\n\t" - "cmp r2, #0\n\t" - "beq 5f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 305f\n\t" - "mov r3, #184\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n305:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 205f\n\t" - "mov r3, #176\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n205:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 105f\n\t" - "mov r3, #168\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n105:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #160\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n5:\n\t" - "ldr r2, [%[a], #16]\n\t" - "cmp r2, #0\n\t" - "beq 4f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 304f\n\t" - "mov r3, #152\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n304:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 204f\n\t" - "mov r3, #144\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n204:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 104f\n\t" - "mov r3, #136\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n104:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #128\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n4:\n\t" - "ldr r2, [%[a], #12]\n\t" - "cmp r2, #0\n\t" - "beq 3f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 303f\n\t" - "mov r3, #120\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n303:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 203f\n\t" - "mov r3, #112\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n203:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 103f\n\t" - "mov r3, #104\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n103:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #96\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n3:\n\t" - "ldr r2, [%[a], #8]\n\t" - "cmp r2, #0\n\t" - "beq 2f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 302f\n\t" - "mov r3, #88\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n302:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 202f\n\t" - "mov r3, #80\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n202:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 102f\n\t" - "mov r3, #72\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n102:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #64\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n2:\n\t" - "ldr r2, [%[a], #4]\n\t" - "cmp r2, #0\n\t" - "beq 1f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 301f\n\t" - "mov r3, #56\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n301:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 201f\n\t" - "mov r3, #48\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n201:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 101f\n\t" - "mov r3, #40\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n101:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #32\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n1:\n\t" - "ldr r2, [%[a], #0]\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 300f\n\t" - "mov r3, #24\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n300:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 200f\n\t" - "mov r3, #16\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n200:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 100f\n\t" - "mov r3, #8\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n100:\n\t" - "and r4, r2, #0xff\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "\n13:\n\t" - : [r] "+r" (r) - : [a] "r" (a), [table] "r" (table) - : "r2", "r3", "r4" - ); - - return r; -} + "mov lr, %[L_sp_384_num_bits_12_table]\n\t" + "ldr r1, [%[a], #44]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_11_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_11_3_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x78\n\t" #else -static int sp_384_num_bits_12(sp_digit* a) -{ - int r = 0; - - __asm__ __volatile__ ( - "ldr r2, [%[a], #44]\n\t" - "cmp r2, #0\n\t" - "beq 11f\n\t" - "mov r3, #384\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n11:\n\t" - "ldr r2, [%[a], #40]\n\t" - "cmp r2, #0\n\t" - "beq 10f\n\t" - "mov r3, #352\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n10:\n\t" - "ldr r2, [%[a], #36]\n\t" - "cmp r2, #0\n\t" - "beq 9f\n\t" - "mov r3, #320\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n9:\n\t" - "ldr r2, [%[a], #32]\n\t" - "cmp r2, #0\n\t" - "beq 8f\n\t" - "mov r3, #288\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n8:\n\t" - "ldr r2, [%[a], #28]\n\t" - "cmp r2, #0\n\t" - "beq 7f\n\t" - "mov r3, #256\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n7:\n\t" - "ldr r2, [%[a], #24]\n\t" - "cmp r2, #0\n\t" - "beq 6f\n\t" - "mov r3, #224\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n6:\n\t" - "ldr r2, [%[a], #20]\n\t" - "cmp r2, #0\n\t" - "beq 5f\n\t" - "mov r3, #192\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n5:\n\t" - "ldr r2, [%[a], #16]\n\t" - "cmp r2, #0\n\t" - "beq 4f\n\t" - "mov r3, #160\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n4:\n\t" - "ldr r2, [%[a], #12]\n\t" - "cmp r2, #0\n\t" - "beq 3f\n\t" - "mov r3, #128\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n3:\n\t" - "ldr r2, [%[a], #8]\n\t" - "cmp r2, #0\n\t" - "beq 2f\n\t" - "mov r3, #96\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n2:\n\t" - "ldr r2, [%[a], #4]\n\t" - "cmp r2, #0\n\t" - "beq 1f\n\t" - "mov r3, #64\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 13f\n\t" - "\n1:\n\t" - "ldr r2, [%[a], #0]\n\t" - "mov r3, #32\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "\n13:\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "r2", "r3" - ); - - return r; -} + "mov r2, #0x178\n\t" #endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_11_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_11_2_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x70\n\t" +#else + "mov r2, #0x170\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_11_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_11_1_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x68\n\t" +#else + "mov r2, #0x168\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_11_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x60\n\t" +#else + "mov r2, #0x160\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_11_%=: \n\t" + "ldr r1, [%[a], #40]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_10_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_10_3_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x58\n\t" +#else + "mov r2, #0x158\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_10_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_10_2_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x50\n\t" +#else + "mov r2, #0x150\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_10_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_10_1_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x48\n\t" +#else + "mov r2, #0x148\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_10_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x40\n\t" +#else + "mov r2, #0x140\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_10_%=: \n\t" + "ldr r1, [%[a], #36]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_9_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_9_3_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x38\n\t" +#else + "mov r2, #0x138\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_9_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_9_2_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x30\n\t" +#else + "mov r2, #0x130\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_9_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_9_1_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x28\n\t" +#else + "mov r2, #0x128\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_9_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x20\n\t" +#else + "mov r2, #0x120\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_9_%=: \n\t" + "ldr r1, [%[a], #32]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_8_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_8_3_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x18\n\t" +#else + "mov r2, #0x118\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_8_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_8_2_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x10\n\t" +#else + "mov r2, #0x110\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_8_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_8_1_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x8\n\t" +#else + "mov r2, #0x108\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_8_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x0\n\t" +#else + "mov r2, #0x100\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_8_%=: \n\t" + "ldr r1, [%[a], #28]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_7_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_7_3_%=\n\t" + "mov r2, #0xf8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_7_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_7_2_%=\n\t" + "mov r2, #0xf0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_7_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_7_1_%=\n\t" + "mov r2, #0xe8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_7_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xe0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_7_%=: \n\t" + "ldr r1, [%[a], #24]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_6_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_6_3_%=\n\t" + "mov r2, #0xd8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_6_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_6_2_%=\n\t" + "mov r2, #0xd0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_6_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_6_1_%=\n\t" + "mov r2, #0xc8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_6_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xc0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_6_%=: \n\t" + "ldr r1, [%[a], #20]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_5_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_5_3_%=\n\t" + "mov r2, #0xb8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_5_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_5_2_%=\n\t" + "mov r2, #0xb0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_5_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_5_1_%=\n\t" + "mov r2, #0xa8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_5_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xa0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_5_%=: \n\t" + "ldr r1, [%[a], #16]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_4_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_4_3_%=\n\t" + "mov r2, #0x98\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_4_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_4_2_%=\n\t" + "mov r2, #0x90\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_4_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_4_1_%=\n\t" + "mov r2, #0x88\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_4_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x80\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_4_%=: \n\t" + "ldr r1, [%[a], #12]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_3_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_3_3_%=\n\t" + "mov r2, #0x78\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_3_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_3_2_%=\n\t" + "mov r2, #0x70\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_3_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_3_1_%=\n\t" + "mov r2, #0x68\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_3_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x60\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_3_%=: \n\t" + "ldr r1, [%[a], #8]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_2_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_2_3_%=\n\t" + "mov r2, #0x58\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_2_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_2_2_%=\n\t" + "mov r2, #0x50\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_2_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_2_1_%=\n\t" + "mov r2, #0x48\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_2_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x40\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_2_%=: \n\t" + "ldr r1, [%[a], #4]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_1_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_1_3_%=\n\t" + "mov r2, #56\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_1_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_1_2_%=\n\t" + "mov r2, #48\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_1_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_1_1_%=\n\t" + "mov r2, #40\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_1_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #32\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_1_%=: \n\t" + "ldr r1, [%[a]]\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_0_3_%=\n\t" + "mov r2, #24\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_0_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_0_2_%=\n\t" + "mov r2, #16\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_0_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_384_num_bits_12_0_1_%=\n\t" + "mov r2, #8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_0_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "ldrb r12, [lr, r3]\n\t" + "\n" + "L_sp_384_num_bits_12_13_%=: \n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a) + : [L_sp_384_num_bits_12_table] "r" (L_sp_384_num_bits_12_table) + : "memory", "r1", "r2", "r3", "r12", "lr" + ); + return (uint32_t)(size_t)a; +} +#else +static int sp_384_num_bits_12(const sp_digit* a) +{ + __asm__ __volatile__ ( + "ldr r1, [%[a], #44]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_11_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x80\n\t" +#else + "mov r2, #0x180\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_11_%=: \n\t" + "ldr r1, [%[a], #40]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_10_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x60\n\t" +#else + "mov r2, #0x160\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_10_%=: \n\t" + "ldr r1, [%[a], #36]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_9_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x40\n\t" +#else + "mov r2, #0x140\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_9_%=: \n\t" + "ldr r1, [%[a], #32]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_8_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x20\n\t" +#else + "mov r2, #0x120\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_8_%=: \n\t" + "ldr r1, [%[a], #28]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_7_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x0\n\t" +#else + "mov r2, #0x100\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_7_%=: \n\t" + "ldr r1, [%[a], #24]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_6_%=\n\t" + "mov r2, #0xe0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_6_%=: \n\t" + "ldr r1, [%[a], #20]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_5_%=\n\t" + "mov r2, #0xc0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_5_%=: \n\t" + "ldr r1, [%[a], #16]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_4_%=\n\t" + "mov r2, #0xa0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_4_%=: \n\t" + "ldr r1, [%[a], #12]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_3_%=\n\t" + "mov r2, #0x80\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_3_%=: \n\t" + "ldr r1, [%[a], #8]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_2_%=\n\t" + "mov r2, #0x60\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_2_%=: \n\t" + "ldr r1, [%[a], #4]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_384_num_bits_12_1_%=\n\t" + "mov r2, #0x40\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_384_num_bits_12_13_%=\n\t" + "\n" + "L_sp_384_num_bits_12_1_%=: \n\t" + "ldr r1, [%[a]]\n\t" + "mov r2, #32\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "\n" + "L_sp_384_num_bits_12_13_%=: \n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a) + : + : "memory", "r1", "r2", "r3", "r12", "lr" + ); + return (uint32_t)(size_t)a; +} + +#endif /* WOLFSSL_SP_ARM_ARCH && (WOLFSSL_SP_ARM_ARCH < 7) */ /* Non-constant time modular inversion. * * @param [out] r Resulting number. @@ -47468,61 +94704,84 @@ static const sp_digit p521_b[17] = { static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b) { __asm__ __volatile__ ( - "sub sp, sp, #136\n\t" + "sub sp, sp, #0x88\n\t" "mov r5, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #64\n\t" + "\n" + "L_sp_521_mul_17_outer_%=: \n\t" + "subs r3, r5, #0x40\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_521_mul_17_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #68\n\t" - "beq 3f\n\t" + "cmp r3, #0x44\n\t" + "beq L_sp_521_mul_17_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_521_mul_17_inner_%=\n\t" + "\n" + "L_sp_521_mul_17_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #128\n\t" - "ble 1b\n\t" + "cmp r5, #0x80\n\t" + "ble L_sp_521_mul_17_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "add sp, sp, #8\n\t" - "add %[r], %[r], #8\n\t" - "subs r5, r5, #8\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "ldm sp!, {r6, r7}\n\t" + "stm %[r]!, {r6, r7}\n\t" + "sub r5, r5, #8\n\t" + "\n" + "L_sp_521_mul_17_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "bgt L_sp_521_mul_17_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); } @@ -47536,1982 +94795,10993 @@ static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b) static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b) { __asm__ __volatile__ ( - "sub sp, sp, #68\n\t" + "sub sp, sp, #0x44\n\t" "mov r10, #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r11, [%[a], #0]\n\t" - "ldr r12, [%[b], #0]\n\t" + /* A[0] * B[0] */ + "ldr r11, [%[a]]\n\t" + "ldr r12, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r3, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "mov r5, #0\n\t" +#else "umull r3, r4, r11, r12\n\t" "mov r5, #0\n\t" +#endif "str r3, [sp]\n\t" - "# A[0] * B[1]\n\t" + /* A[0] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[0]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[0] */ "ldr r8, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #4]\n\t" - "# A[2] * B[0]\n\t" + /* A[2] * B[0] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[1] */ "ldr r11, [%[a], #4]\n\t" "ldr r12, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[2]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[2] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #8]\n\t" - "# A[0] * B[3]\n\t" + /* A[0] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[2]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[2] */ "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[1] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[0] */ "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #12]\n\t" - "# A[4] * B[0]\n\t" + /* A[4] * B[0] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[1] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[2] */ "ldr r11, [%[a], #8]\n\t" "ldr r12, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[3] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[4]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[4] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #16]\n\t" - "# A[0] * B[5]\n\t" + /* A[0] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[4]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[4] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[2] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[1] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[0] */ "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #20]\n\t" - "# A[6] * B[0]\n\t" + /* A[6] * B[0] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[1]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[1] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[2] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[3] */ "ldr r11, [%[a], #12]\n\t" "ldr r12, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[4] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[5] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[0] * B[6]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[0] * B[6] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #24]\n\t" - "# A[0] * B[7]\n\t" + /* A[0] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[6]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[6] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[5] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[4] */ "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[3] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[2] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[1]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[1] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[0] */ "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #28]\n\t" - "# A[8] * B[0]\n\t" + /* A[8] * B[0] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[7] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[1] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[2] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[3] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[4] */ "ldr r11, [%[a], #16]\n\t" "ldr r12, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[5] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[6] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[1] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[7] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[8]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[8] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #32]\n\t" - "# A[0] * B[9]\n\t" + /* A[0] * B[9] */ "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[8]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[8] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[7] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[6] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[4] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[3] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[2] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[1] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[0] */ "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #36]\n\t" - "# A[10] * B[0]\n\t" + /* A[10] * B[0] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[9] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[1] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[2] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[3] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[4] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[5] */ "ldr r11, [%[a], #20]\n\t" "ldr r12, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[6] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[7] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[8] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[9] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[10]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[10] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #40]\n\t" - "# A[0] * B[11]\n\t" + /* A[0] * B[11] */ "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[10]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[10] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[9] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[8] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[7] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[6] */ "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[5] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[4] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[3] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[2] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[1] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[0] */ "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #44]\n\t" - "# A[12] * B[0]\n\t" + /* A[12] * B[0] */ "ldr r8, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[11] * B[1]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[1] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[2] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[3] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[4] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[5] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[6] */ "ldr r11, [%[a], #24]\n\t" "ldr r12, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[7] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[8] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[9] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[10] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[11] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[0] * B[12]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[0] * B[12] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #48]\n\t" - "# A[0] * B[13]\n\t" + /* A[0] * B[13] */ "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[12]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[12] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[11]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[11] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[10]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[10] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[9] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[8] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[6] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[5] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[4] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[3] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[2] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[1]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[1] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[0] */ "ldr r8, [%[a], #52]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #52]\n\t" - "# A[14] * B[0]\n\t" + /* A[14] * B[0] */ "ldr r8, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[13] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[1] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[2] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[3] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[4] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[5] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[6] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[7] */ "ldr r11, [%[a], #28]\n\t" "ldr r12, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[8] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[9] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[10]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[10] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[11] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[12]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[12] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[1] * B[13]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[13] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[14]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[14] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #56]\n\t" - "# A[0] * B[15]\n\t" + /* A[0] * B[15] */ "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[14]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[14] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[13]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[13] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[12]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[12] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[11] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[10] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[9] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[8] */ "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[7] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[6] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[5] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[4] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[12] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[3] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[13] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[2] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[14] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[1] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[15] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[0] */ "ldr r8, [%[a], #60]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #60]\n\t" - "# A[16] * B[0]\n\t" + /* A[16] * B[0] */ "ldr r8, [%[a], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[15] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[15] * B[1] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[14] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[2] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[3] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[4] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[5] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[6] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[7] */ "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[8] */ "ldr r11, [%[a], #32]\n\t" "ldr r12, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[9] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[10]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[10] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[11]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[11] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[12]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[12] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[13]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[13] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[14]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[14] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[15]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[15] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[16]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[16] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #64]\n\t" - "# A[1] * B[16]\n\t" + /* A[1] * B[16] */ "ldr r8, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[2] * B[15]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[15] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[14]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[14] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[13]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[13] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[12]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[12] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[11] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[10]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[10] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[9] */ "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[8] */ "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[7] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[6] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[5] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[13] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[4] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[14] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[3] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[15] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[2] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[16] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[16] * B[1] */ "ldr r8, [%[a], #64]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #68]\n\t" - "# A[16] * B[2]\n\t" + /* A[16] * B[2] */ "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[15] * B[3]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[3] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[14] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[4] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[13] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[5] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[12] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[6] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[7] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[8] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[9] */ "ldr r11, [%[a], #36]\n\t" "ldr r12, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[10] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[11] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[12]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[12] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[13]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[13] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[14]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[14] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[15]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[15] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[16]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[16] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #72]\n\t" - "# A[3] * B[16]\n\t" + /* A[3] * B[16] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[4] * B[15]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[15] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[14]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[14] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[13]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[13] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[12]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[12] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[11]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[11] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[10]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[10] */ "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[9] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[8] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[7] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[6] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[14] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[5] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[15] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[15] * B[4] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[16] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[16] * B[3] */ "ldr r8, [%[a], #64]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #76]\n\t" - "# A[16] * B[4]\n\t" + /* A[16] * B[4] */ "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[15] * B[5]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[5] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[14] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[6] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[13] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[7] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[8] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[9] */ "ldr r8, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[10]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[10] */ "ldr r11, [%[a], #40]\n\t" "ldr r12, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[11] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[12]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[12] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[13]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[13] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[14]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[14] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[15]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[15] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[16]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[16] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #80]\n\t" - "# A[5] * B[16]\n\t" + /* A[5] * B[16] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[6] * B[15]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[15] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[14]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[14] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[13]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[13] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[12]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[12] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[11] */ "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[10] */ "ldr r8, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[12] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[9] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[13] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[8] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[14] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[7] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[15] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[6] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[16] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[16] * B[5] */ "ldr r8, [%[a], #64]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #84]\n\t" - "# A[16] * B[6]\n\t" + /* A[16] * B[6] */ "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[15] * B[7]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[15] * B[7] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[14] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[8] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[9] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[10]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[10] */ "ldr r8, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[11]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[11] */ "ldr r11, [%[a], #44]\n\t" "ldr r12, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[12]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[12] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[13]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[13] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[14]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[14] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[15]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[15] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[16]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[16] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #88]\n\t" - "# A[7] * B[16]\n\t" + /* A[7] * B[16] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[8] * B[15]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[15] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[14]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[14] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[13]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[13] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[12]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[12] */ "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[11] */ "ldr r8, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[13] * B[10]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[10] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[14] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[9] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[15] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[8] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[16] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[16] * B[7] */ "ldr r8, [%[a], #64]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #92]\n\t" - "# A[16] * B[8]\n\t" + /* A[16] * B[8] */ "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[15] * B[9]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[9] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[14] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[10] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[13] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[11] */ "ldr r8, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[12] * B[12]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[12] */ "ldr r11, [%[a], #48]\n\t" "ldr r12, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[13]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[13] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[14]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[14] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[15]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[15] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[16]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[16] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #96]\n\t" - "# A[9] * B[16]\n\t" + /* A[9] * B[16] */ "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[10] * B[15]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[15] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[14]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[14] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[13]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[13] */ "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[12]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[12] */ "ldr r8, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[14] * B[11]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[11] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[15] * B[10]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[15] * B[10] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[16] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[16] * B[9] */ "ldr r8, [%[a], #64]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #100]\n\t" - "# A[16] * B[10]\n\t" + /* A[16] * B[10] */ "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[15] * B[11]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[11] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[14] * B[12]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[12] */ "ldr r8, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[13] * B[13]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[13] */ "ldr r11, [%[a], #52]\n\t" "ldr r12, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[14]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[14] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[15]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[15] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[16]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[16] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #104]\n\t" - "# A[11] * B[16]\n\t" + /* A[11] * B[16] */ "ldr r8, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[12] * B[15]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[15] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[13] * B[14]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[14] */ "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[14] * B[13]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[13] */ "ldr r8, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[15] * B[12]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[12] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[16] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[16] * B[11] */ "ldr r8, [%[a], #64]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #108]\n\t" - "# A[16] * B[12]\n\t" + /* A[16] * B[12] */ "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[15] * B[13]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[15] * B[13] */ "ldr r8, [%[a], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[14] * B[14]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[14] */ "ldr r11, [%[a], #56]\n\t" "ldr r12, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[15]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[15] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[16]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[16] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #112]\n\t" - "# A[13] * B[16]\n\t" + /* A[13] * B[16] */ "ldr r8, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[14] * B[15]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[15] */ "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[15] * B[14]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[14] */ "ldr r8, [%[a], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[16] * B[13]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[16] * B[13] */ "ldr r8, [%[a], #64]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #116]\n\t" - "# A[16] * B[14]\n\t" + /* A[16] * B[14] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[15] * B[15]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[15] */ "ldr r11, [%[a], #60]\n\t" "ldr r12, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[14] * B[16]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[16] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #120]\n\t" - "# A[15] * B[16]\n\t" + /* A[15] * B[16] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[16] * B[15]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[16] * B[15] */ "ldr r8, [%[a], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #124]\n\t" - "# A[16] * B[16]\n\t" + /* A[16] * B[16] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adc r3, r3, r7\n\t" +#endif "str r5, [%[r], #128]\n\t" "str r3, [%[r], #132]\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" @@ -49524,9 +105794,8 @@ static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b) "stm %[r]!, {r3, r4, r5, r6}\n\t" "ldm sp!, {r3}\n\t" "stm %[r]!, {r3}\n\t" - "sub %[r], %[r], #68\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } @@ -49541,84 +105810,135 @@ static void sp_521_mul_17(sp_digit* r, const sp_digit* a, const sp_digit* b) static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "sub sp, sp, #136\n\t" + "sub sp, sp, #0x88\n\t" "mov r12, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" "mov r5, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #64\n\t" + "\n" + "L_sp_521_sqr_17_outer_%=: \n\t" + "subs r3, r5, #0x40\n\t" "it cc\n\t" "movcc r3, r12\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" + "\n" + "L_sp_521_sqr_17_inner_%=: \n\t" "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "beq L_sp_521_sqr_17_op_sqr_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "bal L_sp_521_sqr_17_op_done_%=\n\t" + "\n" + "L_sp_521_sqr_17_op_sqr_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_521_sqr_17_op_done_%=: \n\t" "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #68\n\t" - "beq 3f\n\t" + "cmp r3, #0x44\n\t" + "beq L_sp_521_sqr_17_inner_done_%=\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" + "bgt L_sp_521_sqr_17_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_521_sqr_17_inner_%=\n\t" + "\n" + "L_sp_521_sqr_17_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #128\n\t" - "ble 1b\n\t" + "cmp r5, #0x80\n\t" + "ble L_sp_521_sqr_17_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "add sp, sp, #8\n\t" - "add %[r], %[r], #8\n\t" - "subs r5, r5, #8\n\t" - "\n4:\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r9, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r9, [%[r], #12]\n\t" -#else - "ldrd r6, r7, [sp, #0]\n\t" - "ldrd r8, r9, [sp, #8]\n\t" - "strd r6, r7, [%[r], #0]\n\t" - "strd r8, r9, [%[r], #8]\n\t" -#endif - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "ldm sp!, {r6, r7}\n\t" + "stm %[r]!, {r6, r7}\n\t" + "sub r5, r5, #8\n\t" + "\n" + "L_sp_521_sqr_17_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "bgt L_sp_521_sqr_17_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "r12" ); } @@ -49631,109 +105951,515 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "sub sp, sp, #68\n\t" - "mov r12, #0\n\t" - "# A[0] * A[0]\n\t" - "ldr r10, [%[a], #0]\n\t" + "sub sp, sp, #0x44\n\t" + /* A[0] * A[0] */ + "ldr r10, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r9, r10, #16\n\t" + "lsl r2, r10, #16\n\t" + "lsr r2, r2, #16\n\t" + "mul r8, r2, r2\n\t" + "mul r3, r9, r9\n\t" + "mul r2, r9, r2\n\t" + "lsr r9, r2, #15\n\t" + "lsl r2, r2, #17\n\t" + "adds r8, r8, r2\n\t" + "adc r3, r3, r9\n\t" +#else "umull r8, r3, r10, r10\n\t" +#endif "mov r4, #0\n\t" "str r8, [sp]\n\t" - "# A[0] * A[1]\n\t" + /* A[0] * A[1] */ "ldr r10, [%[a], #4]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #4]\n\t" - "# A[0] * A[2]\n\t" + /* A[0] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[1] * A[1]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * A[1] */ "ldr r10, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #8]\n\t" - "# A[0] * A[3]\n\t" + /* A[0] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[1] * A[2]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [sp, #12]\n\t" - "# A[0] * A[4]\n\t" + /* A[0] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[1] * A[3]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[1] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[2] * A[2]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[2] * A[2] */ "ldr r10, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #16]\n\t" - "# A[0] * A[5]\n\t" + /* A[0] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[4]\n\t" + /* A[1] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -49741,66 +106467,294 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #20]\n\t" - "# A[0] * A[6]\n\t" + /* A[0] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[5]\n\t" + /* A[1] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[3] */ "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #24]\n\t" - "# A[0] * A[7]\n\t" + /* A[0] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[6]\n\t" + /* A[1] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -49808,80 +106762,370 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #28]\n\t" - "# A[0] * A[8]\n\t" + /* A[0] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[7]\n\t" + /* A[1] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[4] */ "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #32]\n\t" - "# A[0] * A[9]\n\t" + /* A[0] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[8]\n\t" + /* A[1] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -49889,94 +107133,446 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #36]\n\t" - "# A[0] * A[10]\n\t" + /* A[0] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[9]\n\t" + /* A[1] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[5] */ "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #40]\n\t" - "# A[0] * A[11]\n\t" + /* A[0] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[10]\n\t" + /* A[1] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -49984,108 +107580,522 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #44]\n\t" - "# A[0] * A[12]\n\t" + /* A[0] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[11]\n\t" + /* A[1] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[6] */ "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #48]\n\t" - "# A[0] * A[13]\n\t" + /* A[0] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[12]\n\t" + /* A[1] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -50093,122 +108103,598 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #52]\n\t" - "# A[0] * A[14]\n\t" + /* A[0] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[13]\n\t" + /* A[1] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[7] */ "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #56]\n\t" - "# A[0] * A[15]\n\t" + /* A[0] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[14]\n\t" + /* A[1] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -50216,129 +108702,636 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #60]\n\t" - "# A[0] * A[16]\n\t" + /* A[0] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[15]\n\t" + /* A[1] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[14]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[8] */ "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #64]\n\t" - "# A[1] * A[16]\n\t" + /* A[1] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[2] * A[15]\n\t" + /* A[2] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[14]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -50346,115 +109339,560 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #68]\n\t" - "# A[2] * A[16]\n\t" + /* A[2] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[3] * A[15]\n\t" + /* A[3] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[14]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[9] */ "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #72]\n\t" - "# A[3] * A[16]\n\t" + /* A[3] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[4] * A[15]\n\t" + /* A[4] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[14]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -50462,101 +109900,484 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #76]\n\t" - "# A[4] * A[16]\n\t" + /* A[4] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[5] * A[15]\n\t" + /* A[5] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[14]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[10] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[10] */ "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #80]\n\t" - "# A[5] * A[16]\n\t" + /* A[5] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[6] * A[15]\n\t" + /* A[6] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[14]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[10] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -50564,87 +110385,408 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #84]\n\t" - "# A[6] * A[16]\n\t" + /* A[6] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[7] * A[15]\n\t" + /* A[7] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[14]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[10] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[11] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[11] * A[11] */ "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #88]\n\t" - "# A[7] * A[16]\n\t" + /* A[7] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[8] * A[15]\n\t" + /* A[8] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[14]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[10] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[11] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[11] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -50652,73 +110794,332 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #92]\n\t" - "# A[8] * A[16]\n\t" + /* A[8] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[9] * A[15]\n\t" + /* A[9] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[10] * A[14]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[11] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[11] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[12] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[12] * A[12] */ "ldr r10, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #96]\n\t" - "# A[9] * A[16]\n\t" + /* A[9] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[10] * A[15]\n\t" + /* A[10] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[11] * A[14]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[11] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[12] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[12] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -50726,59 +111127,256 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #100]\n\t" - "# A[10] * A[16]\n\t" + /* A[10] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[11] * A[15]\n\t" + /* A[11] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[12] * A[14]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[12] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[13] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[13] * A[13] */ "ldr r10, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #104]\n\t" - "# A[11] * A[16]\n\t" + /* A[11] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[12] * A[15]\n\t" + /* A[12] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[13] * A[14]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[13] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -50786,87 +111384,415 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #108]\n\t" - "# A[12] * A[16]\n\t" + /* A[12] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[13] * A[15]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[13] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[14] * A[14]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[14] * A[14] */ "ldr r10, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [%[r], #112]\n\t" - "# A[13] * A[16]\n\t" + /* A[13] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[14] * A[15]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #56]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #116]\n\t" - "# A[14] * A[16]\n\t" + /* A[14] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #56]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[15] * A[15]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * A[15] */ "ldr r10, [%[a], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [%[r], #120]\n\t" - "# A[15] * A[16]\n\t" + /* A[15] * A[16] */ "ldr r10, [%[a], #64]\n\t" - "ldr r8, [%[a], #60]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [%[r], #124]\n\t" - "# A[16] * A[16]\n\t" + /* A[16] * A[16] */ "ldr r10, [%[a], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adc r2, r2, r9\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adc r2, r2, r9\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adc r2, r2, r9\n\t" +#endif "str r4, [%[r], #128]\n\t" "str r2, [%[r], #132]\n\t" "ldm sp!, {r2, r3, r4, r8}\n\t" @@ -50879,10 +111805,9 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "stm %[r]!, {r2, r3, r4, r8}\n\t" "ldm sp!, {r2}\n\t" "stm %[r]!, {r2}\n\t" - "sub %[r], %[r], #68\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r12" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); } @@ -50894,15 +111819,14 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #64\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #0x40\n\t" + "\n" + "L_sp_521_add_17_word_%=: \n\t" + "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" @@ -50911,22 +111835,21 @@ static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, "adcs r7, r7, r11\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - "adds %[c], %[c], #-1\n\t" - "ldm %[a]!, {r4}\n\t" - "ldm %[b]!, {r8}\n\t" + "adc r3, r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne L_sp_521_add_17_word_%=\n\t" + "adds r3, r3, #-1\n\t" + "ldm %[a], {r4}\n\t" + "ldm %[b], {r8}\n\t" "adcs r4, r4, r8\n\t" "stm %[r]!, {r4}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "adc %[r], r4, #0\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -50936,52 +111859,48 @@ static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4}\n\t" - "ldm %[b]!, {r8}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" - "stm %[r]!, {r4}\n\t" - "adc %[c], r14, r14\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3}\n\t" + "ldm %[b]!, {r7}\n\t" + "adcs r3, r3, r7\n\t" + "stm %[r]!, {r3}\n\t" + "adc %[r], r12, r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -50992,37 +111911,35 @@ static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #64\n\t" - "\n1:\n\t" - "rsbs %[c], %[c], #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x40\n\t" + "\n" + "L_sp_521_sub_17_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r4, r4\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - "rsbs %[c], %[c], #0\n\t" - "ldm %[a]!, {r4}\n\t" - "ldm %[b]!, {r8}\n\t" - "sbcs r4, r4, r8\n\t" - "stm %[r]!, {r4}\n\t" - "sbc %[c], r7, r7\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_521_sub_17_word_%=\n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3}\n\t" + "ldm %[b]!, {r7}\n\t" + "sbcs r3, r3, r7\n\t" + "stm %[r]!, {r3}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -51032,51 +111949,47 @@ static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4}\n\t" - "ldm %[b]!, {r8}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" - "stm %[r]!, {r4}\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3}\n\t" + "ldm %[b]!, {r7}\n\t" + "sbcs r3, r3, r7\n\t" + "stm %[r]!, {r3}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ @@ -51290,6 +112203,7 @@ static int sp_521_point_to_ecc_point_17(const sp_point_521* p, ecc_point* pm) return err; } +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -51298,34 +112212,45 @@ static int sp_521_point_to_ecc_point_17(const sp_point_521* p, ecc_point* pm) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r6, [%[b], r8]\n\t" - "and r6, r6, %[m]\n\t" - "sbcs r4, r4, r6\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #68\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_521_cond_sub_17_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #0x44\n\t" + "blt L_sp_521_cond_sub_17_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6" ); -#else - __asm__ __volatile__ ( + return (uint32_t)(size_t)r; +} - "mov r9, #0\n\t" +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +{ + __asm__ __volatile__ ( + "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" @@ -51387,33 +112312,50 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digi "and r6, r6, %[m]\n\t" "sbcs r4, r4, r6\n\t" "str r4, [%[r]]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7" ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#endif /* WOLFSSL_SP_SMALL */ /* Reduce the number back to 521 bits using Montgomery reduction. * * a A single precision number to reduce in place. * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, sp_digit mp) { - (void)mp; - (void)m; - __asm__ __volatile__ ( - "sub sp, sp, #68\n\t" + "sub sp, sp, #0x44\n\t" "mov r12, sp\n\t" - "add r14, %[a], #64\n\t" - "ldm r14!, {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10}\n\t" + /* Shift top down by 9 bits */ + "add lr, %[a], #0x40\n\t" + /* 0-7 */ + "ldm lr!, {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + "lsr r1, r1, #9\n\t" + "orr r1, r1, r2, lsl #23\n\t" + "lsr r2, r2, #9\n\t" + "orr r2, r2, r3, lsl #23\n\t" + "lsr r3, r3, #9\n\t" + "orr r3, r3, r4, lsl #23\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "lsr r5, r5, #9\n\t" + "orr r5, r5, r6, lsl #23\n\t" + "lsr r6, r6, #9\n\t" + "orr r6, r6, r7, lsl #23\n\t" + "lsr r7, r7, #9\n\t" + "orr r7, r7, r8, lsl #23\n\t" + "lsr r8, r8, #9\n\t" + "orr r8, r8, r9, lsl #23\n\t" + "stm r12!, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "mov r1, r9\n\t" + /* 8-16 */ + "ldm lr!, {r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "lsr r1, r1, #9\n\t" "orr r1, r1, r2, lsl #23\n\t" "lsr r2, r2, #9\n\t" @@ -51431,29 +112373,11 @@ SP_NOINLINE static void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, "lsr r8, r8, #9\n\t" "orr r8, r8, r9, lsl #23\n\t" "lsr r9, r9, #9\n\t" - "orr r9, r9, r10, lsl #23\n\t" "stm r12!, {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - "mov r1, r10\n\t" - "ldm r14, {r2, r3, r4, r5, r6, r7, r8}\n\t" - "lsr r1, r1, #9\n\t" - "orr r1, r1, r2, lsl #23\n\t" - "lsr r2, r2, #9\n\t" - "orr r2, r2, r3, lsl #23\n\t" - "lsr r3, r3, #9\n\t" - "orr r3, r3, r4, lsl #23\n\t" - "lsr r4, r4, #9\n\t" - "orr r4, r4, r5, lsl #23\n\t" - "lsr r5, r5, #9\n\t" - "orr r5, r5, r6, lsl #23\n\t" - "lsr r6, r6, #9\n\t" - "orr r6, r6, r7, lsl #23\n\t" - "lsr r7, r7, #9\n\t" - "orr r7, r7, r8, lsl #23\n\t" - "lsr r8, r8, #9\n\t" - "stm r12!, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" - "mov r14, sp\n\t" + /* Add top to bottom */ + /* 0-5 */ "ldm %[a], {r1, r2, r3, r4, r5, r6}\n\t" - "ldm r14!, {r7, r8, r9, r10, r11, r12}\n\t" + "ldm sp!, {r7, r8, r9, r10, r11, r12}\n\t" "adds r1, r1, r7\n\t" "adcs r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" @@ -51461,8 +112385,9 @@ SP_NOINLINE static void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, "adcs r5, r5, r11\n\t" "adcs r6, r6, r12\n\t" "stm %[a]!, {r1, r2, r3, r4, r5, r6}\n\t" + /* 6-11 */ "ldm %[a], {r1, r2, r3, r4, r5, r6}\n\t" - "ldm r14!, {r7, r8, r9, r10, r11, r12}\n\t" + "ldm sp!, {r7, r8, r9, r10, r11, r12}\n\t" "adcs r1, r1, r7\n\t" "adcs r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" @@ -51470,54 +112395,56 @@ SP_NOINLINE static void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, "adcs r5, r5, r11\n\t" "adcs r6, r6, r12\n\t" "stm %[a]!, {r1, r2, r3, r4, r5, r6}\n\t" + /* 12-16 */ "ldm %[a], {r1, r2, r3, r4, r5}\n\t" - "ldm r14!, {r7, r8, r9, r10, r11}\n\t" -#if __ARM_ARCH <= 6 - "mov r14, #0x1\n\t" - "lsl r14, r14, #8\n\t" - "add r14, r14, #0xff\n\t" + "ldm sp!, {r7, r8, r9, r10, r11}\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov lr, #0x1\n\t" + "lsl lr, lr, #8\n\t" + "add lr, lr, #0xff\n\t" #else - "mov r14, #0x1ff\n\t" + "mov lr, #0x1ff\n\t" #endif - "and r5, r5, r14\n\t" + "and r5, r5, lr\n\t" "adcs r1, r1, r7\n\t" "adcs r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" "adcs r4, r4, r10\n\t" "adcs r5, r5, r11\n\t" "lsr r12, r5, #9\n\t" - "and r5, r5, r14\n\t" + "and r5, r5, lr\n\t" "stm %[a]!, {r1, r2, r3, r4, r5}\n\t" - "sub %[a], %[a], #68\n\t" - "mov r11, #0\n\t" - "ldm %[a], {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + "sub %[a], %[a], #0x44\n\t" + /* Add overflow */ + /* 0-8 */ + "ldm %[a], {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "adds r1, r1, r12\n\t" - "adcs r2, r2, r11\n\t" - "adcs r3, r3, r11\n\t" - "adcs r4, r4, r11\n\t" - "adcs r5, r5, r11\n\t" - "adcs r6, r6, r11\n\t" - "adcs r7, r7, r11\n\t" - "adcs r8, r8, r11\n\t" - "adcs r9, r9, r11\n\t" - "stm %[a]!, {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" - "ldm %[a], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" - "adcs r1, r1, r11\n\t" - "adcs r2, r2, r11\n\t" - "adcs r3, r3, r11\n\t" - "adcs r4, r4, r11\n\t" - "adcs r5, r5, r11\n\t" - "adcs r6, r6, r11\n\t" - "adcs r7, r7, r11\n\t" - "adcs r8, r8, r11\n\t" - "stm %[a]!, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" - "add sp, sp, #68\n\t" - "sub %[a], %[a], #68\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "stm %[a]!, {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" + /* 9-16 */ + "ldm %[a], {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" + "adcs r1, r1, #0\n\t" + "adcs r2, r2, #0\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "stm %[a]!, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" : [a] "+r" (a) : - : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr" ); - + (void)m; + (void)mp; } /* Reduce the number back to 521 bits using Montgomery reduction. @@ -51526,23 +112453,24 @@ SP_NOINLINE static void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* m, sp_digit mp) { - sp_digit ca = 0; - __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "cmp r12, #64\n\t" - "bne L_521_mont_reduce_17_nomask\n\t" -#if __ARM_ARCH <= 6 +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_521_mont_reduce_order_17_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + "cmp r9, #0x40\n\t" + "bne L_sp_521_mont_reduce_order_17_nomask_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) "mov r7, #0x1\n\t" "lsl r7, r7, #8\n\t" "add r7, r7, #0xff\n\t" @@ -51550,169 +112478,625 @@ SP_NOINLINE static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* "mov r7, #0x1ff\n\t" #endif "and r8, r8, r7\n\t" - "L_521_mont_reduce_17_nomask:\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" + "\n" + "L_sp_521_mont_reduce_order_17_nomask_%=: \n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, r8, r11\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "str r12, [%[a]]\n\t" + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r10, #0\n\t" +#endif + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r10, #0\n\t" +#endif + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" "adds r10, r10, r6\n\t" - "str r10, [%[a], #0]\n\t" - "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" +#endif "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" - "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+12] += m[12] * mu\n\t" - "ldr r7, [%[m], #48]\n\t" - "ldr r9, [%[a], #48]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #48]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" "adc r5, r5, #0\n\t" - "# a[i+13] += m[13] * mu\n\t" - "ldr r7, [%[m], #52]\n\t" - "ldr r9, [%[a], #52]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #52]\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+14] += m[14] * mu\n\t" - "ldr r7, [%[m], #56]\n\t" - "ldr r9, [%[a], #56]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #56]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" "adc r5, r5, #0\n\t" - "# a[i+15] += m[15] * mu\n\t" - "ldr r7, [%[m], #60]\n\t" - "ldr r9, [%[a], #60]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #60]\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r4, r4, #0\n\t" - "# a[i+16] += m[16] * mu\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+16] += m[16] * mu */ +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r7, [%[m], #64]\n\t" - "ldr r9, [%[a], #64]\n\t" +#else + "ldr r11, [%[m], #64]\n\t" +#endif + "ldr r10, [%[a], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r7\n\t" "adds r4, r4, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #64]\n\t" - "ldr r9, [%[a], #68]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #68]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r5, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "ldr r10, [%[a], #68]\n\t" + "adcs r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #68\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" + "cmp r9, #0x44\n\t" + "blt L_sp_521_mont_reduce_order_17_word_%=\n\t" + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" "sub %[a], %[a], #4\n\t" - "ldr r4, [%[a], #0]\n\t" + "ldr r4, [%[a]]\n\t" "ldr r5, [%[a], #4]\n\t" "lsr r4, r4, #9\n\t" "orr r4, r4, r5, lsl #23\n\t" @@ -51779,14 +113163,14 @@ SP_NOINLINE static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* "str r5, [%[a], #64]\n\t" "lsr r4, r4, #9\n\t" "str r4, [%[a], #68]\n\t" - "lsr %[ca], r4, #9\n\t" + "lsr r3, r4, #9\n\t" "add %[a], %[a], #4\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); - - sp_521_cond_sub_17(a - 17, a, m, (sp_digit)0 - ca); + sp_521_cond_sub_17(a - 17, a, m, (sp_digit)0 - mp); } /* Multiply two Montgomery form numbers mod the modulus (prime). @@ -51937,233 +113321,225 @@ static void sp_521_mont_inv_17(sp_digit* r, const sp_digit* a, sp_digit* td) */ static sp_int32 sp_521_cmp_17(const sp_digit* a, const sp_digit* b) { - sp_digit r = -1; - sp_digit one = 1; - - + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #64\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "mov r4, #0x40\n\t" + "\n" + "L_sp_521_cmp_17_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_521_cmp_17_words_%=\n\t" + "eor r2, r2, r3\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "ldr r12, [%[a], #64]\n\t" + "ldr lr, [%[b], #64]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #60]\n\t" + "ldr lr, [%[b], #60]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #56]\n\t" + "ldr lr, [%[b], #56]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #52]\n\t" + "ldr lr, [%[b], #52]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #48]\n\t" + "ldr lr, [%[b], #48]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); -#endif - - return r; + return (uint32_t)(size_t)a; } /* Normalize the values in each word to 32. @@ -52220,87 +113596,78 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_521_mont_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +static void sp_521_mont_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { - (void)m; - __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4}\n\t" - "ldm %[b]!, {r8}\n\t" - "adcs r4, r4, r8\n\t" -#if __ARM_ARCH <= 6 - "mov r14, #0x1\n\t" - "lsl r14, r14, #8\n\t" - "add r14, r14, #0xff\n\t" + "mov r3, #0\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8}\n\t" + "ldm %[b]!, {r4}\n\t" + "adcs r8, r8, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r12, #0x1\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0xff\n\t" #else - "mov r14, #0x1ff\n\t" + "mov r12, #0x1ff\n\t" #endif - "lsr r12, r4, #9\n\t" - "and r4, r4, r14\n\t" - "stm %[r]!, {r4}\n\t" - "sub %[r], %[r], #68\n\t" - "mov r14, #0\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "adds r4, r4, r12\n\t" - "adcs r5, r5, r14\n\t" - "adcs r6, r6, r14\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "adcs r4, r4, r14\n\t" - "adcs r5, r5, r14\n\t" - "adcs r6, r6, r14\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "adcs r4, r4, r14\n\t" - "adcs r5, r5, r14\n\t" - "adcs r6, r6, r14\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "adcs r4, r4, r14\n\t" - "adcs r5, r5, r14\n\t" - "adcs r6, r6, r14\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" + "lsr r3, r8, #9\n\t" + "and r8, r8, r12\n\t" + "stm %[r]!, {r8}\n\t" + "sub %[r], %[r], #0x44\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adcs r11, r11, #0\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adcs r11, r11, #0\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ldm %[r], {r4}\n\t" - "adcs r4, r4, r14\n\t" + "adcs r4, r4, #0\n\t" "stm %[r]!, {r4}\n\t" - "sub %[r], %[r], #68\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); } @@ -52312,82 +113679,67 @@ static void sp_521_mont_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b */ static void sp_521_mont_dbl_17(sp_digit* r, const sp_digit* a, const sp_digit* m) { - (void)m; - __asm__ __volatile__ ( - "mov r8, #0\n\t" - "ldm %[a]!, {r2, r3, r4, r5, r6, r7}\n\t" - "adds r2, r2, r2\n\t" - "adcs r3, r3, r3\n\t" + "mov r2, #0\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adds r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "adcs r4, r4, r4\n\t" "adcs r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adcs r7, r7, r7\n\t" - "str r2, [%[r], #0]\n\t" - "str r3, [%[r], #4]\n\t" - "str r4, [%[r], #8]\n\t" - "str r5, [%[r], #12]\n\t" - "str r6, [%[r], #16]\n\t" - "stm %[r]!, {r2, r3, r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r2, r3, r4, r5, r6, r7}\n\t" - "adcs r2, r2, r2\n\t" - "adcs r3, r3, r3\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4}\n\t" "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "str r2, [%[r], #0]\n\t" - "str r3, [%[r], #4]\n\t" - "str r4, [%[r], #8]\n\t" - "str r5, [%[r], #12]\n\t" - "str r6, [%[r], #16]\n\t" - "stm %[r]!, {r2, r3, r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r2, r3, r4, r5, r6}\n\t" - "adcs r2, r2, r2\n\t" - "adcs r3, r3, r3\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" -#if __ARM_ARCH <= 6 - "mov r9, #0x1\n\t" - "lsl r9, r9, #8\n\t" - "add r9, r9, #0xff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r3, #0x1\n\t" + "lsl r3, r3, #8\n\t" + "add r3, r3, #0xff\n\t" #else - "mov r9, #0x1ff\n\t" + "mov r3, #0x1ff\n\t" #endif - "lsr r8, r6, #9\n\t" - "and r6, r6, r9\n\t" - "stm %[r]!, {r2, r3, r4, r5, r6}\n\t" - "sub %[r], %[r], #68\n\t" - "mov r9, #0\n\t" - "ldm %[r], {r2, r3, r4, r5, r6, r7}\n\t" - "adds r2, r2, r8\n\t" - "adcs r3, r3, r9\n\t" - "adcs r4, r4, r9\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r9\n\t" - "adcs r7, r7, r9\n\t" - "stm %[r]!, {r2, r3, r4, r5, r6, r7}\n\t" - "ldm %[r], {r2, r3, r4, r5, r6, r7}\n\t" - "adcs r2, r2, r9\n\t" - "adcs r3, r3, r9\n\t" - "adcs r4, r4, r9\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r9\n\t" - "adcs r7, r7, r9\n\t" - "stm %[r]!, {r2, r3, r4, r5, r6, r7}\n\t" - "ldm %[r], {r2, r3, r4, r5, r6}\n\t" - "adcs r2, r2, r9\n\t" - "adcs r3, r3, r9\n\t" - "adcs r4, r4, r9\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r9\n\t" - "stm %[r]!, {r2, r3, r4, r5, r6}\n\t" - "sub %[r], %[r], #68\n\t" - "sub %[a], %[a], #68\n\t" - : [r] "+r" (r), [a] "+r" (a) + "lsr r2, r4, #9\n\t" + "and r4, r4, r3\n\t" + "stm %[r]!, {r4}\n\t" + "sub %[r], %[r], #0x44\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adds r4, r4, r2\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adcs r11, r11, #0\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adcs r11, r11, #0\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4}\n\t" + "adcs r4, r4, #0\n\t" + "stm %[r]!, {r4}\n\t" + : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3" ); } @@ -52399,113 +113751,87 @@ static void sp_521_mont_dbl_17(sp_digit* r, const sp_digit* a, const sp_digit* m */ static void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, const sp_digit* m) { - (void)m; - __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "mov r2, #0\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "adds r4, r4, r4\n\t" "adcs r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adcs r7, r7, r7\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "adcs r4, r4, r4\n\t" "adcs r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adcs r7, r7, r7\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ldm %[a]!, {r4}\n\t" "adcs r4, r4, r4\n\t" "stm %[r]!, {r4}\n\t" - "sub %[r], %[r], #68\n\t" - "sub %[a], %[a], #68\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "adds r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8}\n\t" - "ldm %[a]!, {r4}\n\t" - "adcs r8, r8, r4\n\t" -#if __ARM_ARCH <= 6 - "mov r14, #0x1\n\t" - "lsl r14, r14, #8\n\t" - "add r14, r14, #0xff\n\t" -#else - "mov r14, #0x1ff\n\t" -#endif - "lsr r12, r8, #9\n\t" - "and r8, r8, r14\n\t" - "stm %[r]!, {r8}\n\t" - "sub %[r], %[r], #68\n\t" - "mov r14, #0\n\t" + "sub %[r], %[r], #0x44\n\t" + "sub %[a], %[a], #0x44\n\t" "ldm %[r], {r4, r5, r6, r7}\n\t" - "adds r4, r4, r12\n\t" - "adcs r5, r5, r14\n\t" - "adcs r6, r6, r14\n\t" - "adcs r7, r7, r14\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "ldm %[r], {r4, r5, r6, r7}\n\t" - "adcs r4, r4, r14\n\t" - "adcs r5, r5, r14\n\t" - "adcs r6, r6, r14\n\t" - "adcs r7, r7, r14\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "ldm %[r], {r4, r5, r6, r7}\n\t" - "adcs r4, r4, r14\n\t" - "adcs r5, r5, r14\n\t" - "adcs r6, r6, r14\n\t" - "adcs r7, r7, r14\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "ldm %[r], {r4, r5, r6, r7}\n\t" - "adcs r4, r4, r14\n\t" - "adcs r5, r5, r14\n\t" - "adcs r6, r6, r14\n\t" - "adcs r7, r7, r14\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "ldm %[r], {r4}\n\t" - "adcs r4, r4, r14\n\t" + "ldm %[a]!, {r8}\n\t" + "adcs r4, r4, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r3, #0x1\n\t" + "lsl r3, r3, #8\n\t" + "add r3, r3, #0xff\n\t" +#else + "mov r3, #0x1ff\n\t" +#endif + "lsr r2, r4, #9\n\t" + "and r4, r4, r3\n\t" "stm %[r]!, {r4}\n\t" - "sub %[r], %[r], #68\n\t" - : [r] "+r" (r), [a] "+r" (a) + "sub %[r], %[r], #0x44\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adds r4, r4, r2\n\t" + "adcs r4, r4, #0\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4}\n\t" + "adcs r4, r4, #0\n\t" + "stm %[r]!, {r4}\n\t" + : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r2", "r3" ); } @@ -52516,88 +113842,79 @@ static void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, const sp_digit* m * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +static void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { - (void)m; - __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4}\n\t" - "ldm %[b]!, {r8}\n\t" - "sbcs r4, r4, r8\n\t" -#if __ARM_ARCH <= 6 - "mov r14, #0x1\n\t" - "lsl r14, r14, #8\n\t" - "add r14, r14, #0xff\n\t" + "mov r3, #0\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "subs r8, r8, r4\n\t" + "sbcs r9, r9, r5\n\t" + "sbcs r10, r10, r6\n\t" + "sbcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "sbcs r8, r8, r4\n\t" + "sbcs r9, r9, r5\n\t" + "sbcs r10, r10, r6\n\t" + "sbcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "sbcs r8, r8, r4\n\t" + "sbcs r9, r9, r5\n\t" + "sbcs r10, r10, r6\n\t" + "sbcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8, r9, r10, r11}\n\t" + "ldm %[b]!, {r4, r5, r6, r7}\n\t" + "sbcs r8, r8, r4\n\t" + "sbcs r9, r9, r5\n\t" + "sbcs r10, r10, r6\n\t" + "sbcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r8}\n\t" + "ldm %[b]!, {r4}\n\t" + "sbcs r8, r8, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r12, #0x1\n\t" + "lsl r12, r12, #8\n\t" + "add r12, r12, #0xff\n\t" #else - "mov r14, #0x1ff\n\t" + "mov r12, #0x1ff\n\t" #endif - "asr r12, r4, #9\n\t" - "and r4, r4, r14\n\t" - "neg r12, r12\n\t" - "stm %[r]!, {r4}\n\t" - "sub %[r], %[r], #68\n\t" - "mov r14, #0\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "subs r4, r4, r12\n\t" - "sbcs r5, r5, r14\n\t" - "sbcs r6, r6, r14\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "sbcs r4, r4, r14\n\t" - "sbcs r5, r5, r14\n\t" - "sbcs r6, r6, r14\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "sbcs r4, r4, r14\n\t" - "sbcs r5, r5, r14\n\t" - "sbcs r6, r6, r14\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "sbcs r4, r4, r14\n\t" - "sbcs r5, r5, r14\n\t" - "sbcs r6, r6, r14\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" + "asr r3, r8, #9\n\t" + "and r8, r8, r12\n\t" + "neg r3, r3\n\t" + "stm %[r]!, {r8}\n\t" + "sub %[r], %[r], #0x44\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, #0\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, #0\n\t" + "sbcs r10, r10, #0\n\t" + "sbcs r11, r11, #0\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "sbcs r4, r4, #0\n\t" + "sbcs r5, r5, #0\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, #0\n\t" + "sbcs r10, r10, #0\n\t" + "sbcs r11, r11, #0\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ldm %[r], {r4}\n\t" - "sbcs r4, r4, r14\n\t" + "sbcs r4, r4, #0\n\t" "stm %[r]!, {r4}\n\t" - "sub %[r], %[r], #68\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); } @@ -52605,17 +113922,12 @@ static void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b static void sp_521_rshift1_17(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r2, [%[a]]\n\t" - "ldr r3, [%[a], #4]\n\t" -#else - "ldrd r2, r3, [%[a]]\n\t" -#endif + "ldm %[a], {r2, r3}\n\t" "lsr r2, r2, #1\n\t" "orr r2, r2, r3, lsl #31\n\t" "lsr r3, r3, #1\n\t" "ldr r4, [%[a], #8]\n\t" - "str r2, [%[r], #0]\n\t" + "str r2, [%[r]]\n\t" "orr r3, r3, r4, lsl #31\n\t" "lsr r4, r4, #1\n\t" "ldr r2, [%[a], #12]\n\t" @@ -52676,8 +113988,8 @@ static void sp_521_rshift1_17(sp_digit* r, const sp_digit* a) "lsr r3, r3, #1\n\t" "str r2, [%[r], #60]\n\t" "str r3, [%[r], #64]\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4" ); } @@ -56862,36 +118174,36 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, static void sp_521_add_one_17(sp_digit* a) { __asm__ __volatile__ ( - "ldm %[a], {r2, r3, r4, r5}\n\t" - "adds r2, r2, #1\n\t" - "adcs r3, r3, #0\n\t" - "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "stm %[a]!, {r2, r3, r4, r5}\n\t" - "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" + "adds r1, r1, #1\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "stm %[a]!, {r2, r3, r4, r5}\n\t" - "ldm %[a], {r2, r3, r4, r5}\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" + "adcs r1, r1, #0\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "stm %[a]!, {r2, r3, r4, r5}\n\t" - "ldm %[a], {r2, r3, r4, r5}\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" + "adcs r1, r1, #0\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "adcs r5, r5, #0\n\t" - "stm %[a]!, {r2, r3, r4, r5}\n\t" - "ldm %[a], {r2}\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + "ldm %[a], {r1, r2, r3, r4}\n\t" + "adcs r1, r1, #0\n\t" "adcs r2, r2, #0\n\t" - "stm %[a]!, {r2}\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "stm %[a]!, {r1, r2, r3, r4}\n\t" + "ldm %[a], {r1}\n\t" + "adcs r1, r1, #0\n\t" + "stm %[a]!, {r1}\n\t" : [a] "+r" (a) : - : "memory", "r2", "r3", "r4", "r5" + : "memory", "r1", "r2", "r3", "r4" ); } @@ -57138,96 +118450,101 @@ int sp_ecc_secret_gen_521(const mp_int* priv, const ecc_point* pub, byte* out, static void sp_521_rshift_17(sp_digit* r, const sp_digit* a, byte n) { __asm__ __volatile__ ( - "rsb r6, %[n], #32\n\t" + "rsb r12, %[n], #32\n\t" #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r2, [%[a]]\n\t" - "ldr r3, [%[a], #4]\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r5, [%[a], #4]\n\t" #else - "ldrd r2, r3, [%[a]]\n\t" + "ldrd r4, r5, [%[a]]\n\t" #endif - "lsr r2, r2, %[n]\n\t" - "lsl r5, r3, r6\n\t" - "lsr r3, r3, %[n]\n\t" - "orr r2, r2, r5\n\t" - "ldr r4, [%[a], #8]\n\t" - "str r2, [%[r], #0]\n\t" - "lsl r5, r4, r6\n\t" "lsr r4, r4, %[n]\n\t" - "orr r3, r3, r5\n\t" - "ldr r2, [%[a], #12]\n\t" - "str r3, [%[r], #4]\n\t" - "lsl r5, r2, r6\n\t" - "lsr r2, r2, %[n]\n\t" - "orr r4, r4, r5\n\t" - "ldr r3, [%[a], #16]\n\t" - "str r4, [%[r], #8]\n\t" - "lsl r5, r3, r6\n\t" - "lsr r3, r3, %[n]\n\t" - "orr r2, r2, r5\n\t" - "ldr r4, [%[a], #20]\n\t" - "str r2, [%[r], #12]\n\t" - "lsl r5, r4, r6\n\t" + "lsl r3, r5, r12\n\t" + "lsr r5, r5, %[n]\n\t" + "orr r4, r4, r3\n\t" + "ldr r6, [%[a], #8]\n\t" + "str r4, [%[a]]\n\t" + "lsl r3, r6, r12\n\t" + "lsr r6, r6, %[n]\n\t" + "orr r5, r5, r3\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r5, [%[a], #4]\n\t" + "lsl r3, r4, r12\n\t" "lsr r4, r4, %[n]\n\t" - "orr r3, r3, r5\n\t" - "ldr r2, [%[a], #24]\n\t" - "str r3, [%[r], #16]\n\t" - "lsl r5, r2, r6\n\t" - "lsr r2, r2, %[n]\n\t" - "orr r4, r4, r5\n\t" - "ldr r3, [%[a], #28]\n\t" - "str r4, [%[r], #20]\n\t" - "lsl r5, r3, r6\n\t" - "lsr r3, r3, %[n]\n\t" - "orr r2, r2, r5\n\t" - "ldr r4, [%[a], #32]\n\t" - "str r2, [%[r], #24]\n\t" - "lsl r5, r4, r6\n\t" + "orr r6, r6, r3\n\t" + "ldr r5, [%[a], #16]\n\t" + "str r6, [%[a], #8]\n\t" + "lsl r3, r5, r12\n\t" + "lsr r5, r5, %[n]\n\t" + "orr r4, r4, r3\n\t" + "ldr r6, [%[a], #20]\n\t" + "str r4, [%[a], #12]\n\t" + "lsl r3, r6, r12\n\t" + "lsr r6, r6, %[n]\n\t" + "orr r5, r5, r3\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r5, [%[a], #16]\n\t" + "lsl r3, r4, r12\n\t" "lsr r4, r4, %[n]\n\t" - "orr r3, r3, r5\n\t" - "ldr r2, [%[a], #36]\n\t" - "str r3, [%[r], #28]\n\t" - "lsl r5, r2, r6\n\t" - "lsr r2, r2, %[n]\n\t" - "orr r4, r4, r5\n\t" - "ldr r3, [%[a], #40]\n\t" - "str r4, [%[r], #32]\n\t" - "lsl r5, r3, r6\n\t" - "lsr r3, r3, %[n]\n\t" - "orr r2, r2, r5\n\t" - "ldr r4, [%[a], #44]\n\t" - "str r2, [%[r], #36]\n\t" - "lsl r5, r4, r6\n\t" + "orr r6, r6, r3\n\t" + "ldr r5, [%[a], #28]\n\t" + "str r6, [%[a], #20]\n\t" + "lsl r3, r5, r12\n\t" + "lsr r5, r5, %[n]\n\t" + "orr r4, r4, r3\n\t" + "ldr r6, [%[a], #32]\n\t" + "str r4, [%[a], #24]\n\t" + "lsl r3, r6, r12\n\t" + "lsr r6, r6, %[n]\n\t" + "orr r5, r5, r3\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r5, [%[a], #28]\n\t" + "lsl r3, r4, r12\n\t" "lsr r4, r4, %[n]\n\t" - "orr r3, r3, r5\n\t" - "ldr r2, [%[a], #48]\n\t" - "str r3, [%[r], #40]\n\t" - "lsl r5, r2, r6\n\t" - "lsr r2, r2, %[n]\n\t" - "orr r4, r4, r5\n\t" - "ldr r3, [%[a], #52]\n\t" - "str r4, [%[r], #44]\n\t" - "lsl r5, r3, r6\n\t" - "lsr r3, r3, %[n]\n\t" - "orr r2, r2, r5\n\t" - "ldr r4, [%[a], #56]\n\t" - "str r2, [%[r], #48]\n\t" - "lsl r5, r4, r6\n\t" + "orr r6, r6, r3\n\t" + "ldr r5, [%[a], #40]\n\t" + "str r6, [%[a], #32]\n\t" + "lsl r3, r5, r12\n\t" + "lsr r5, r5, %[n]\n\t" + "orr r4, r4, r3\n\t" + "ldr r6, [%[a], #44]\n\t" + "str r4, [%[a], #36]\n\t" + "lsl r3, r6, r12\n\t" + "lsr r6, r6, %[n]\n\t" + "orr r5, r5, r3\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r5, [%[a], #40]\n\t" + "lsl r3, r4, r12\n\t" "lsr r4, r4, %[n]\n\t" - "orr r3, r3, r5\n\t" - "ldr r2, [%[a], #60]\n\t" - "str r3, [%[r], #52]\n\t" - "lsl r5, r2, r6\n\t" - "lsr r2, r2, %[n]\n\t" - "orr r4, r4, r5\n\t" - "ldr r3, [%[a], #64]\n\t" - "str r4, [%[r], #56]\n\t" - "lsl r5, r3, r6\n\t" - "lsr r3, r3, %[n]\n\t" - "orr r2, r2, r5\n\t" - "strd r2, r3, [%[r], #60]\n\t" + "orr r6, r6, r3\n\t" + "ldr r5, [%[a], #52]\n\t" + "str r6, [%[a], #44]\n\t" + "lsl r3, r5, r12\n\t" + "lsr r5, r5, %[n]\n\t" + "orr r4, r4, r3\n\t" + "ldr r6, [%[a], #56]\n\t" + "str r4, [%[a], #48]\n\t" + "lsl r3, r6, r12\n\t" + "lsr r6, r6, %[n]\n\t" + "orr r5, r5, r3\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r5, [%[a], #52]\n\t" + "lsl r3, r4, r12\n\t" + "lsr r4, r4, %[n]\n\t" + "orr r6, r6, r3\n\t" + "ldr r5, [%[a], #64]\n\t" + "str r6, [%[a], #56]\n\t" + "lsl r3, r5, r12\n\t" + "lsr r5, r5, %[n]\n\t" + "orr r4, r4, r3\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #60]\n\t" + "str r5, [%[r], #64]\n\t" +#else + "strd r4, r5, [%[r], #60]\n\t" +#endif + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : [r] "r" (r), [a] "r" (a), [n] "r" (n) - : "memory", "r2", "r3", "r4", "r5", "r6" + : "memory", "r4", "r5", "r6", "r3", "r12" ); } @@ -57238,326 +118555,326 @@ static void sp_521_rshift_17(sp_digit* r, const sp_digit* a, byte n) static void sp_521_lshift_17(sp_digit* r, const sp_digit* a, byte n) { __asm__ __volatile__ ( - "rsb r6, %[n], #31\n\t" - "ldr r3, [%[a], #64]\n\t" - "lsr r4, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r4, r4, r6\n\t" - "ldr r2, [%[a], #60]\n\t" - "str r4, [%[r], #68]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #56]\n\t" - "str r3, [%[r], #64]\n\t" - "lsr r5, r4, #1\n\t" + "rsb r12, %[n], #31\n\t" + "ldr r5, [%[a], #64]\n\t" + "lsr r6, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r6, r6, r12\n\t" + "ldr r4, [%[a], #60]\n\t" + "str r6, [%[r], #68]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #52]\n\t" - "str r2, [%[r], #60]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #48]\n\t" - "str r4, [%[r], #56]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #44]\n\t" - "str r3, [%[r], #52]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #56]\n\t" + "str r5, [%[r], #64]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #52]\n\t" + "str r4, [%[r], #60]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #48]\n\t" + "str r6, [%[r], #56]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #40]\n\t" - "str r2, [%[r], #48]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #36]\n\t" - "str r4, [%[r], #44]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #32]\n\t" - "str r3, [%[r], #40]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #44]\n\t" + "str r5, [%[r], #52]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #40]\n\t" + "str r4, [%[r], #48]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #36]\n\t" + "str r6, [%[r], #44]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #28]\n\t" - "str r2, [%[r], #36]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #24]\n\t" - "str r4, [%[r], #32]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #20]\n\t" - "str r3, [%[r], #28]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #32]\n\t" + "str r5, [%[r], #40]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #28]\n\t" + "str r4, [%[r], #36]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #24]\n\t" + "str r6, [%[r], #32]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #16]\n\t" - "str r2, [%[r], #24]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #12]\n\t" - "str r4, [%[r], #20]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #8]\n\t" - "str r3, [%[r], #16]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #20]\n\t" + "str r5, [%[r], #28]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #16]\n\t" + "str r4, [%[r], #24]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #12]\n\t" + "str r6, [%[r], #20]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #4]\n\t" - "str r2, [%[r], #12]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #0]\n\t" - "str r4, [%[r], #8]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "str r2, [%[r], #0]\n\t" - "str r3, [%[r], #4]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #8]\n\t" + "str r5, [%[r], #16]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #4]\n\t" + "str r4, [%[r], #12]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a]]\n\t" + "str r6, [%[r], #8]\n\t" + "lsr r3, r4, #1\n\t" + "lsl r4, r4, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "str r4, [%[r]]\n\t" + "str r5, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : [r] "r" (r), [a] "r" (a), [n] "r" (n) - : "memory", "r2", "r3", "r4", "r5", "r6" + : "memory", "r4", "r5", "r6", "r3", "r12" ); } static void sp_521_lshift_34(sp_digit* r, const sp_digit* a, byte n) { __asm__ __volatile__ ( - "rsb r6, %[n], #31\n\t" - "ldr r3, [%[a], #132]\n\t" - "lsr r4, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r4, r4, r6\n\t" - "ldr r2, [%[a], #128]\n\t" - "str r4, [%[r], #136]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #124]\n\t" - "str r3, [%[r], #132]\n\t" - "lsr r5, r4, #1\n\t" + "rsb r12, %[n], #31\n\t" + "ldr r5, [%[a], #132]\n\t" + "lsr r6, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r6, r6, r12\n\t" + "ldr r4, [%[a], #128]\n\t" + "str r6, [%[r], #136]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #120]\n\t" - "str r2, [%[r], #128]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #116]\n\t" - "str r4, [%[r], #124]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #112]\n\t" - "str r3, [%[r], #120]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #124]\n\t" + "str r5, [%[r], #132]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #120]\n\t" + "str r4, [%[r], #128]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #116]\n\t" + "str r6, [%[r], #124]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #108]\n\t" - "str r2, [%[r], #116]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #104]\n\t" - "str r4, [%[r], #112]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #100]\n\t" - "str r3, [%[r], #108]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #112]\n\t" + "str r5, [%[r], #120]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #108]\n\t" + "str r4, [%[r], #116]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #104]\n\t" + "str r6, [%[r], #112]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #96]\n\t" - "str r2, [%[r], #104]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #92]\n\t" - "str r4, [%[r], #100]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #88]\n\t" - "str r3, [%[r], #96]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #100]\n\t" + "str r5, [%[r], #108]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #96]\n\t" + "str r4, [%[r], #104]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #92]\n\t" + "str r6, [%[r], #100]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #84]\n\t" - "str r2, [%[r], #92]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #80]\n\t" - "str r4, [%[r], #88]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #76]\n\t" - "str r3, [%[r], #84]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #88]\n\t" + "str r5, [%[r], #96]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #84]\n\t" + "str r4, [%[r], #92]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #80]\n\t" + "str r6, [%[r], #88]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #72]\n\t" - "str r2, [%[r], #80]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #68]\n\t" - "str r4, [%[r], #76]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #64]\n\t" - "str r3, [%[r], #72]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #76]\n\t" + "str r5, [%[r], #84]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #72]\n\t" + "str r4, [%[r], #80]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #68]\n\t" + "str r6, [%[r], #76]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #60]\n\t" - "str r2, [%[r], #68]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #56]\n\t" - "str r4, [%[r], #64]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #52]\n\t" - "str r3, [%[r], #60]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #64]\n\t" + "str r5, [%[r], #72]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #60]\n\t" + "str r4, [%[r], #68]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #56]\n\t" + "str r6, [%[r], #64]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #48]\n\t" - "str r2, [%[r], #56]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #44]\n\t" - "str r4, [%[r], #52]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #40]\n\t" - "str r3, [%[r], #48]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #52]\n\t" + "str r5, [%[r], #60]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #48]\n\t" + "str r4, [%[r], #56]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #44]\n\t" + "str r6, [%[r], #52]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #36]\n\t" - "str r2, [%[r], #44]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #32]\n\t" - "str r4, [%[r], #40]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #28]\n\t" - "str r3, [%[r], #36]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #40]\n\t" + "str r5, [%[r], #48]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #36]\n\t" + "str r4, [%[r], #44]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #32]\n\t" + "str r6, [%[r], #40]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #24]\n\t" - "str r2, [%[r], #32]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #20]\n\t" - "str r4, [%[r], #28]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #16]\n\t" - "str r3, [%[r], #24]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #28]\n\t" + "str r5, [%[r], #36]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #24]\n\t" + "str r4, [%[r], #32]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #20]\n\t" + "str r6, [%[r], #28]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #12]\n\t" - "str r2, [%[r], #20]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "ldr r2, [%[a], #8]\n\t" - "str r4, [%[r], #16]\n\t" - "lsr r5, r2, #1\n\t" - "lsl r2, r2, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r3, r3, r5\n\t" - "ldr r4, [%[a], #4]\n\t" - "str r3, [%[r], #12]\n\t" - "lsr r5, r4, #1\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #16]\n\t" + "str r5, [%[r], #24]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a], #12]\n\t" + "str r4, [%[r], #20]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "ldr r4, [%[a], #8]\n\t" + "str r6, [%[r], #16]\n\t" + "lsr r3, r4, #1\n\t" "lsl r4, r4, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r2, r2, r5\n\t" - "ldr r3, [%[a], #0]\n\t" - "str r2, [%[r], #8]\n\t" - "lsr r5, r3, #1\n\t" - "lsl r3, r3, %[n]\n\t" - "lsr r5, r5, r6\n\t" - "orr r4, r4, r5\n\t" - "str r3, [%[r], #0]\n\t" - "str r4, [%[r], #4]\n\t" + "lsr r3, r3, r12\n\t" + "orr r5, r5, r3\n\t" + "ldr r6, [%[a], #4]\n\t" + "str r5, [%[r], #12]\n\t" + "lsr r3, r6, #1\n\t" + "lsl r6, r6, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r4, r4, r3\n\t" + "ldr r5, [%[a]]\n\t" + "str r4, [%[r], #8]\n\t" + "lsr r3, r5, #1\n\t" + "lsl r5, r5, %[n]\n\t" + "lsr r3, r3, r12\n\t" + "orr r6, r6, r3\n\t" + "str r5, [%[r]]\n\t" + "str r6, [%[r], #4]\n\t" + : [r] "+r" (r), [a] "+r" (a), [n] "+r" (n) : - : [r] "r" (r), [a] "r" (a), [n] "r" (n) - : "memory", "r2", "r3", "r4", "r5", "r6" + : "memory", "r4", "r5", "r6", "r3", "r12" ); } @@ -57569,35 +118886,34 @@ static void sp_521_lshift_34(sp_digit* r, const sp_digit* a, byte n) */ static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #64\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r10, #0\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x40\n\t" + "\n" + "L_sp_521_sub_in_pkace_17_word_%=: \n\t" + "subs r12, r10, r12\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - "subs %[c], r14, %[c]\n\t" - "ldm %[a], {r4}\n\t" - "ldm %[b]!, {r8}\n\t" - "sbcs r4, r4, r8\n\t" - "stm %[a]!, {r4}\n\t" - "sbc %[c], r14, r14\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r10, r10\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_521_sub_in_pkace_17_word_%=\n\t" + "subs r12, r10, r12\n\t" + "ldm %[a], {r2}\n\t" + "ldm %[b]!, {r6}\n\t" + "sbcs r2, r2, r6\n\t" + "stm %[a]!, {r2}\n\t" + "sbc %[a], r10, r10\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" ); - - return c; + return (uint32_t)(size_t)a; } #else @@ -57608,215 +118924,825 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) */ static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4}\n\t" - "ldm %[b]!, {r8}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" - "stm %[a]!, {r4}\n\t" - "sbc %[c], r11, r11\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2}\n\t" + "ldm %[b]!, {r6}\n\t" + "sbcs r2, r2, r6\n\t" + "stm %[a]!, {r2}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b) { -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_521_mul_d_17_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" - "cmp r9, #68\n\t" - "blt 1b\n\t" + "cmp r9, #0x44\n\t" + "blt L_sp_521_mul_d_17_word_%=\n\t" "str r3, [%[r], #68]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); +} + #else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, sp_digit b) +{ __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else "umull r3, r4, %[b], r8\n\t" +#endif "mov r5, #0\n\t" "str r3, [%[r]], #4\n\t" - "# A[1] * B\n\t" + /* A[1] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[2] * B\n\t" + /* A[2] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[3] * B\n\t" + /* A[3] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[4] * B\n\t" + /* A[4] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[5] * B\n\t" + /* A[5] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[6] * B\n\t" + /* A[6] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[7] * B\n\t" + /* A[7] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[8] * B\n\t" + /* A[8] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[9] * B\n\t" + /* A[9] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[10] * B\n\t" + /* A[10] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[11] * B\n\t" + /* A[11] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[12] * B\n\t" + /* A[12] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[13] * B\n\t" + /* A[13] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[14] * B\n\t" + /* A[14] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[15] * B\n\t" + /* A[15] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[16] * B\n\t" + /* A[16] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" +#endif "str r4, [%[r]], #4\n\t" "str r5, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10" ); -#endif } +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV /* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. @@ -57828,57 +119754,184 @@ static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, */ static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_521_word_17(sp_digit d1, sp_digit d0, sp_digit div) +{ + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_521_word_17_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_521_word_17_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* AND m into each word of a and store in r. * * r A single precision integer. @@ -58528,960 +120581,1285 @@ int sp_ecc_sign_521(const byte* hash, word32 hashLen, WC_RNG* rng, * a Number to divide. * m Modulus. */ -static void sp_521_div2_mod_17(sp_digit* r, const sp_digit* a, - const sp_digit* m) +static void sp_521_div2_mod_17(sp_digit* r, const sp_digit* a, const sp_digit* m) { __asm__ __volatile__ ( - "ldr r4, [%[a]]\n\t" - "ands r8, r4, #1\n\t" - "beq 1f\n\t" - "mov r12, #0\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "ldr r8, [%[m], #0]\n\t" - "ldr r9, [%[m], #4]\n\t" - "ldr r10, [%[m], #8]\n\t" - "ldr r14, [%[m], #12]\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" - "str r6, [%[r], #8]\n\t" - "str r7, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "ldr r8, [%[m], #16]\n\t" - "ldr r9, [%[m], #20]\n\t" - "ldr r10, [%[m], #24]\n\t" - "ldr r14, [%[m], #28]\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" - "str r6, [%[r], #24]\n\t" - "str r7, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[a], #40]\n\t" - "ldr r7, [%[a], #44]\n\t" - "ldr r8, [%[m], #32]\n\t" - "ldr r9, [%[m], #36]\n\t" - "ldr r10, [%[m], #40]\n\t" - "ldr r14, [%[m], #44]\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" - "str r6, [%[r], #40]\n\t" - "str r7, [%[r], #44]\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[a], #52]\n\t" - "ldr r6, [%[a], #56]\n\t" - "ldr r7, [%[a], #60]\n\t" - "ldr r8, [%[m], #48]\n\t" - "ldr r9, [%[m], #52]\n\t" - "ldr r10, [%[m], #56]\n\t" - "ldr r14, [%[m], #60]\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "str r4, [%[r], #48]\n\t" - "str r5, [%[r], #52]\n\t" - "str r6, [%[r], #56]\n\t" - "str r7, [%[r], #60]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r8, [%[m], #64]\n\t" - "adcs r4, r4, r8\n\t" - "str r4, [%[r], #64]\n\t" - "adc r8, r12, r12\n\t" - "b 2f\n\t" - "\n1:\n\t" - "ldr r5, [%[a], #2]\n\t" - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #2]\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[a], #6]\n\t" - "str r4, [%[r], #4]\n\t" - "str r5, [%[r], #6]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #10]\n\t" - "str r4, [%[r], #8]\n\t" - "str r5, [%[r], #10]\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[a], #14]\n\t" - "str r4, [%[r], #12]\n\t" - "str r5, [%[r], #14]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #18]\n\t" - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #18]\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[a], #22]\n\t" - "str r4, [%[r], #20]\n\t" - "str r5, [%[r], #22]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #26]\n\t" - "str r4, [%[r], #24]\n\t" - "str r5, [%[r], #26]\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[a], #30]\n\t" - "str r4, [%[r], #28]\n\t" - "str r5, [%[r], #30]\n\t" - "\n2:\n\t" - "ldr r3, [%[r]]\n\t" - "ldr r4, [%[r], #4]\n\t" - "lsr r3, r3, #1\n\t" - "orr r3, r3, r4, lsl #31\n\t" - "lsr r4, r4, #1\n\t" - "ldr r5, [%[a], #8]\n\t" - "str r3, [%[r], #0]\n\t" - "orr r4, r4, r5, lsl #31\n\t" - "lsr r5, r5, #1\n\t" - "ldr r3, [%[a], #12]\n\t" - "str r4, [%[r], #4]\n\t" - "orr r5, r5, r3, lsl #31\n\t" - "lsr r3, r3, #1\n\t" - "ldr r4, [%[a], #16]\n\t" - "str r5, [%[r], #8]\n\t" - "orr r3, r3, r4, lsl #31\n\t" - "lsr r4, r4, #1\n\t" - "ldr r5, [%[a], #20]\n\t" - "str r3, [%[r], #12]\n\t" - "orr r4, r4, r5, lsl #31\n\t" - "lsr r5, r5, #1\n\t" - "ldr r3, [%[a], #24]\n\t" - "str r4, [%[r], #16]\n\t" - "orr r5, r5, r3, lsl #31\n\t" - "lsr r3, r3, #1\n\t" - "ldr r4, [%[a], #28]\n\t" - "str r5, [%[r], #20]\n\t" - "orr r3, r3, r4, lsl #31\n\t" - "lsr r4, r4, #1\n\t" - "ldr r5, [%[a], #32]\n\t" - "str r3, [%[r], #24]\n\t" - "orr r4, r4, r5, lsl #31\n\t" - "lsr r5, r5, #1\n\t" - "ldr r3, [%[a], #36]\n\t" - "str r4, [%[r], #28]\n\t" - "orr r5, r5, r3, lsl #31\n\t" - "lsr r3, r3, #1\n\t" - "ldr r4, [%[a], #40]\n\t" - "str r5, [%[r], #32]\n\t" - "orr r3, r3, r4, lsl #31\n\t" - "lsr r4, r4, #1\n\t" - "ldr r5, [%[a], #44]\n\t" - "str r3, [%[r], #36]\n\t" - "orr r4, r4, r5, lsl #31\n\t" - "lsr r5, r5, #1\n\t" - "ldr r3, [%[a], #48]\n\t" - "str r4, [%[r], #40]\n\t" - "orr r5, r5, r3, lsl #31\n\t" - "lsr r3, r3, #1\n\t" - "ldr r4, [%[a], #52]\n\t" - "str r5, [%[r], #44]\n\t" - "orr r3, r3, r4, lsl #31\n\t" - "lsr r4, r4, #1\n\t" - "ldr r5, [%[a], #56]\n\t" - "str r3, [%[r], #48]\n\t" - "orr r4, r4, r5, lsl #31\n\t" - "lsr r5, r5, #1\n\t" - "ldr r3, [%[a], #60]\n\t" - "str r4, [%[r], #52]\n\t" - "orr r5, r5, r3, lsl #31\n\t" - "lsr r3, r3, #1\n\t" - "ldr r4, [%[a], #64]\n\t" - "str r5, [%[r], #56]\n\t" - "orr r3, r3, r4, lsl #31\n\t" - "lsr r4, r4, #1\n\t" - "orr r4, r4, r8, lsl #31\n\t" - "str r3, [%[r], #60]\n\t" - "str r4, [%[r], #64]\n\t" + "ldr r4, [%[a]], #4\n\t" + "ands r3, r4, #1\n\t" + "beq L_sp_521_div2_mod_17_even_%=\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4}\n\t" + "ldm %[m]!, {r8}\n\t" + "adcs r4, r4, r8\n\t" + "stm %[r]!, {r4}\n\t" + "adc r3, r12, r12\n\t" + "b L_sp_521_div2_mod_17_div2_%=\n\t" + "\n" + "L_sp_521_div2_mod_17_even_%=: \n\t" + "ldm %[a]!, {r5, r6, r7}\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4}\n\t" + "stm %[r]!, {r4}\n\t" + "\n" + "L_sp_521_div2_mod_17_div2_%=: \n\t" + "sub %[r], #0x44\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r8, [%[r]]\n\t" + "ldr r9, [%[r], #4]\n\t" +#else + "ldrd r8, r9, [%[r]]\n\t" +#endif + "lsr r8, r8, #1\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #8]\n\t" + "str r8, [%[r]]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #12]\n\t" + "str r9, [%[r], #4]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #16]\n\t" + "str r10, [%[r], #8]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #20]\n\t" + "str r8, [%[r], #12]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #24]\n\t" + "str r9, [%[r], #16]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #28]\n\t" + "str r10, [%[r], #20]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #32]\n\t" + "str r8, [%[r], #24]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #36]\n\t" + "str r9, [%[r], #28]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #40]\n\t" + "str r10, [%[r], #32]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #44]\n\t" + "str r8, [%[r], #36]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #48]\n\t" + "str r9, [%[r], #40]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #52]\n\t" + "str r10, [%[r], #44]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "ldr r10, [%[r], #56]\n\t" + "str r8, [%[r], #48]\n\t" + "orr r9, r9, r10, lsl #31\n\t" + "lsr r10, r10, #1\n\t" + "ldr r8, [%[r], #60]\n\t" + "str r9, [%[r], #52]\n\t" + "orr r10, r10, r8, lsl #31\n\t" + "lsr r8, r8, #1\n\t" + "ldr r9, [%[r], #64]\n\t" + "str r10, [%[r], #56]\n\t" + "orr r8, r8, r9, lsl #31\n\t" + "lsr r9, r9, #1\n\t" + "orr r9, r9, r3, lsl #31\n\t" + "str r8, [%[r], #60]\n\t" + "str r9, [%[r], #64]\n\t" + : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); } #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) -static int sp_521_num_bits_17(sp_digit* a) +static const unsigned char L_sp_521_num_bits_17_table[] = { + 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, +}; + +static int sp_521_num_bits_17(const sp_digit* a) { - static const byte sp_num_bits_table[256] = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - }; - const byte* table = sp_num_bits_table; - int r = 0; - __asm__ __volatile__ ( - "ldr r2, [%[a], #64]\n\t" - "cmp r2, #0\n\t" - "beq 16f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 316f\n\t" - "mov r3, #536\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n316:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 216f\n\t" - "mov r3, #528\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n216:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 116f\n\t" - "mov r3, #520\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n116:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #512\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n16:\n\t" - "ldr r2, [%[a], #60]\n\t" - "cmp r2, #0\n\t" - "beq 15f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 315f\n\t" - "mov r3, #504\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n315:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 215f\n\t" - "mov r3, #496\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n215:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 115f\n\t" - "mov r3, #488\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n115:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #480\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n15:\n\t" - "ldr r2, [%[a], #56]\n\t" - "cmp r2, #0\n\t" - "beq 14f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 314f\n\t" - "mov r3, #472\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n314:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 214f\n\t" - "mov r3, #464\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n214:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 114f\n\t" - "mov r3, #456\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n114:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #448\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n14:\n\t" - "ldr r2, [%[a], #52]\n\t" - "cmp r2, #0\n\t" - "beq 13f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 313f\n\t" - "mov r3, #440\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n313:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 213f\n\t" - "mov r3, #432\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n213:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 113f\n\t" - "mov r3, #424\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n113:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #416\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n13:\n\t" - "ldr r2, [%[a], #48]\n\t" - "cmp r2, #0\n\t" - "beq 12f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 312f\n\t" - "mov r3, #408\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n312:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 212f\n\t" - "mov r3, #400\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n212:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 112f\n\t" - "mov r3, #392\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n112:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #384\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n12:\n\t" - "ldr r2, [%[a], #44]\n\t" - "cmp r2, #0\n\t" - "beq 11f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 311f\n\t" - "mov r3, #376\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n311:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 211f\n\t" - "mov r3, #368\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n211:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 111f\n\t" - "mov r3, #360\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n111:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #352\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n11:\n\t" - "ldr r2, [%[a], #40]\n\t" - "cmp r2, #0\n\t" - "beq 10f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 310f\n\t" - "mov r3, #344\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n310:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 210f\n\t" - "mov r3, #336\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n210:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 110f\n\t" - "mov r3, #328\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n110:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #320\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n10:\n\t" - "ldr r2, [%[a], #36]\n\t" - "cmp r2, #0\n\t" - "beq 9f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 309f\n\t" - "mov r3, #312\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n309:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 209f\n\t" - "mov r3, #304\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n209:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 109f\n\t" - "mov r3, #296\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n109:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #288\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n9:\n\t" - "ldr r2, [%[a], #32]\n\t" - "cmp r2, #0\n\t" - "beq 8f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 308f\n\t" - "mov r3, #280\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n308:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 208f\n\t" - "mov r3, #272\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n208:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 108f\n\t" - "mov r3, #264\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n108:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #256\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n8:\n\t" - "ldr r2, [%[a], #28]\n\t" - "cmp r2, #0\n\t" - "beq 7f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 307f\n\t" - "mov r3, #248\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n307:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 207f\n\t" - "mov r3, #240\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n207:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 107f\n\t" - "mov r3, #232\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n107:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #224\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n7:\n\t" - "ldr r2, [%[a], #24]\n\t" - "cmp r2, #0\n\t" - "beq 6f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 306f\n\t" - "mov r3, #216\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n306:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 206f\n\t" - "mov r3, #208\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n206:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 106f\n\t" - "mov r3, #200\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n106:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #192\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n6:\n\t" - "ldr r2, [%[a], #20]\n\t" - "cmp r2, #0\n\t" - "beq 5f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 305f\n\t" - "mov r3, #184\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n305:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 205f\n\t" - "mov r3, #176\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n205:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 105f\n\t" - "mov r3, #168\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n105:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #160\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n5:\n\t" - "ldr r2, [%[a], #16]\n\t" - "cmp r2, #0\n\t" - "beq 4f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 304f\n\t" - "mov r3, #152\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n304:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 204f\n\t" - "mov r3, #144\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n204:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 104f\n\t" - "mov r3, #136\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n104:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #128\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n4:\n\t" - "ldr r2, [%[a], #12]\n\t" - "cmp r2, #0\n\t" - "beq 3f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 303f\n\t" - "mov r3, #120\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n303:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 203f\n\t" - "mov r3, #112\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n203:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 103f\n\t" - "mov r3, #104\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n103:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #96\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n3:\n\t" - "ldr r2, [%[a], #8]\n\t" - "cmp r2, #0\n\t" - "beq 2f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 302f\n\t" - "mov r3, #88\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n302:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 202f\n\t" - "mov r3, #80\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n202:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 102f\n\t" - "mov r3, #72\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n102:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #64\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n2:\n\t" - "ldr r2, [%[a], #4]\n\t" - "cmp r2, #0\n\t" - "beq 1f\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 301f\n\t" - "mov r3, #56\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n301:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 201f\n\t" - "mov r3, #48\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n201:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 101f\n\t" - "mov r3, #40\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n101:\n\t" - "and r4, r2, #0xff\n\t" - "mov r3, #32\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n1:\n\t" - "ldr r2, [%[a], #0]\n\t" - "lsr r4, r2, #24\n\t" - "cmp r4, #0\n\t" - "beq 300f\n\t" - "mov r3, #24\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n300:\n\t" - "lsr r4, r2, #16\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 200f\n\t" - "mov r3, #16\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n200:\n\t" - "lsr r4, r2, #8\n\t" - "and r4, r4, #0xff\n\t" - "cmp r4, #0\n\t" - "beq 100f\n\t" - "mov r3, #8\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "add %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n100:\n\t" - "and r4, r2, #0xff\n\t" - "ldrb %[r], [%[table], r4]\n\t" - "\n18:\n\t" - : [r] "+r" (r) - : [a] "r" (a), [table] "r" (table) - : "r2", "r3", "r4" - ); - - return r; -} + "mov lr, %[L_sp_521_num_bits_17_table]\n\t" + "ldr r1, [%[a], #64]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_16_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_16_3_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x2\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x18\n\t" #else -static int sp_521_num_bits_17(sp_digit* a) -{ - int r = 0; - - __asm__ __volatile__ ( - "ldr r2, [%[a], #64]\n\t" - "cmp r2, #0\n\t" - "beq 16f\n\t" - "mov r3, #544\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n16:\n\t" - "ldr r2, [%[a], #60]\n\t" - "cmp r2, #0\n\t" - "beq 15f\n\t" - "mov r3, #512\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n15:\n\t" - "ldr r2, [%[a], #56]\n\t" - "cmp r2, #0\n\t" - "beq 14f\n\t" - "mov r3, #480\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n14:\n\t" - "ldr r2, [%[a], #52]\n\t" - "cmp r2, #0\n\t" - "beq 13f\n\t" - "mov r3, #448\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n13:\n\t" - "ldr r2, [%[a], #48]\n\t" - "cmp r2, #0\n\t" - "beq 12f\n\t" - "mov r3, #416\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n12:\n\t" - "ldr r2, [%[a], #44]\n\t" - "cmp r2, #0\n\t" - "beq 11f\n\t" - "mov r3, #384\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n11:\n\t" - "ldr r2, [%[a], #40]\n\t" - "cmp r2, #0\n\t" - "beq 10f\n\t" - "mov r3, #352\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n10:\n\t" - "ldr r2, [%[a], #36]\n\t" - "cmp r2, #0\n\t" - "beq 9f\n\t" - "mov r3, #320\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n9:\n\t" - "ldr r2, [%[a], #32]\n\t" - "cmp r2, #0\n\t" - "beq 8f\n\t" - "mov r3, #288\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n8:\n\t" - "ldr r2, [%[a], #28]\n\t" - "cmp r2, #0\n\t" - "beq 7f\n\t" - "mov r3, #256\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n7:\n\t" - "ldr r2, [%[a], #24]\n\t" - "cmp r2, #0\n\t" - "beq 6f\n\t" - "mov r3, #224\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n6:\n\t" - "ldr r2, [%[a], #20]\n\t" - "cmp r2, #0\n\t" - "beq 5f\n\t" - "mov r3, #192\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n5:\n\t" - "ldr r2, [%[a], #16]\n\t" - "cmp r2, #0\n\t" - "beq 4f\n\t" - "mov r3, #160\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n4:\n\t" - "ldr r2, [%[a], #12]\n\t" - "cmp r2, #0\n\t" - "beq 3f\n\t" - "mov r3, #128\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n3:\n\t" - "ldr r2, [%[a], #8]\n\t" - "cmp r2, #0\n\t" - "beq 2f\n\t" - "mov r3, #96\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n2:\n\t" - "ldr r2, [%[a], #4]\n\t" - "cmp r2, #0\n\t" - "beq 1f\n\t" - "mov r3, #64\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "b 18f\n\t" - "\n1:\n\t" - "ldr r2, [%[a], #0]\n\t" - "mov r3, #32\n\t" - "clz %[r], r2\n\t" - "sub %[r], r3, %[r]\n\t" - "\n18:\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "r2", "r3" - ); - - return r; -} + "mov r2, #0x218\n\t" #endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_16_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_16_2_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x2\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x10\n\t" +#else + "mov r2, #0x210\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_16_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_16_1_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x2\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x8\n\t" +#else + "mov r2, #0x208\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_16_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x2\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x0\n\t" +#else + "mov r2, #0x200\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_16_%=: \n\t" + "ldr r1, [%[a], #60]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_15_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_15_3_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xf8\n\t" +#else + "mov r2, #0x1f8\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_15_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_15_2_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xf0\n\t" +#else + "mov r2, #0x1f0\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_15_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_15_1_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xe8\n\t" +#else + "mov r2, #0x1e8\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_15_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xe0\n\t" +#else + "mov r2, #0x1e0\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_15_%=: \n\t" + "ldr r1, [%[a], #56]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_14_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_14_3_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xd8\n\t" +#else + "mov r2, #0x1d8\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_14_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_14_2_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xd0\n\t" +#else + "mov r2, #0x1d0\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_14_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_14_1_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xc8\n\t" +#else + "mov r2, #0x1c8\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_14_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xc0\n\t" +#else + "mov r2, #0x1c0\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_14_%=: \n\t" + "ldr r1, [%[a], #52]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_13_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_13_3_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xb8\n\t" +#else + "mov r2, #0x1b8\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_13_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_13_2_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xb0\n\t" +#else + "mov r2, #0x1b0\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_13_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_13_1_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xa8\n\t" +#else + "mov r2, #0x1a8\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_13_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xa0\n\t" +#else + "mov r2, #0x1a0\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_13_%=: \n\t" + "ldr r1, [%[a], #48]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_12_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_12_3_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x98\n\t" +#else + "mov r2, #0x198\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_12_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_12_2_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x90\n\t" +#else + "mov r2, #0x190\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_12_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_12_1_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x88\n\t" +#else + "mov r2, #0x188\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_12_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x80\n\t" +#else + "mov r2, #0x180\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_12_%=: \n\t" + "ldr r1, [%[a], #44]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_11_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_11_3_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x78\n\t" +#else + "mov r2, #0x178\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_11_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_11_2_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x70\n\t" +#else + "mov r2, #0x170\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_11_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_11_1_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x68\n\t" +#else + "mov r2, #0x168\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_11_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x60\n\t" +#else + "mov r2, #0x160\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_11_%=: \n\t" + "ldr r1, [%[a], #40]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_10_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_10_3_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x58\n\t" +#else + "mov r2, #0x158\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_10_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_10_2_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x50\n\t" +#else + "mov r2, #0x150\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_10_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_10_1_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x48\n\t" +#else + "mov r2, #0x148\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_10_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x40\n\t" +#else + "mov r2, #0x140\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_10_%=: \n\t" + "ldr r1, [%[a], #36]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_9_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_9_3_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x38\n\t" +#else + "mov r2, #0x138\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_9_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_9_2_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x30\n\t" +#else + "mov r2, #0x130\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_9_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_9_1_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x28\n\t" +#else + "mov r2, #0x128\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_9_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x20\n\t" +#else + "mov r2, #0x120\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_9_%=: \n\t" + "ldr r1, [%[a], #32]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_8_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_8_3_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x18\n\t" +#else + "mov r2, #0x118\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_8_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_8_2_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x10\n\t" +#else + "mov r2, #0x110\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_8_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_8_1_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x8\n\t" +#else + "mov r2, #0x108\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_8_1_%=: \n\t" + "and r3, r1, #0xff\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x0\n\t" +#else + "mov r2, #0x100\n\t" +#endif + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_8_%=: \n\t" + "ldr r1, [%[a], #28]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_7_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_7_3_%=\n\t" + "mov r2, #0xf8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_7_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_7_2_%=\n\t" + "mov r2, #0xf0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_7_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_7_1_%=\n\t" + "mov r2, #0xe8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_7_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xe0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_7_%=: \n\t" + "ldr r1, [%[a], #24]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_6_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_6_3_%=\n\t" + "mov r2, #0xd8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_6_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_6_2_%=\n\t" + "mov r2, #0xd0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_6_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_6_1_%=\n\t" + "mov r2, #0xc8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_6_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xc0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_6_%=: \n\t" + "ldr r1, [%[a], #20]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_5_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_5_3_%=\n\t" + "mov r2, #0xb8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_5_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_5_2_%=\n\t" + "mov r2, #0xb0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_5_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_5_1_%=\n\t" + "mov r2, #0xa8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_5_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0xa0\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_5_%=: \n\t" + "ldr r1, [%[a], #16]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_4_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_4_3_%=\n\t" + "mov r2, #0x98\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_4_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_4_2_%=\n\t" + "mov r2, #0x90\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_4_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_4_1_%=\n\t" + "mov r2, #0x88\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_4_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x80\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_4_%=: \n\t" + "ldr r1, [%[a], #12]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_3_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_3_3_%=\n\t" + "mov r2, #0x78\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_3_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_3_2_%=\n\t" + "mov r2, #0x70\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_3_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_3_1_%=\n\t" + "mov r2, #0x68\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_3_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x60\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_3_%=: \n\t" + "ldr r1, [%[a], #8]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_2_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_2_3_%=\n\t" + "mov r2, #0x58\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_2_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_2_2_%=\n\t" + "mov r2, #0x50\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_2_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_2_1_%=\n\t" + "mov r2, #0x48\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_2_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #0x40\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_2_%=: \n\t" + "ldr r1, [%[a], #4]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_1_%=\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_1_3_%=\n\t" + "mov r2, #56\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_1_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_1_2_%=\n\t" + "mov r2, #48\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_1_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_1_1_%=\n\t" + "mov r2, #40\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_1_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "mov r2, #32\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_1_%=: \n\t" + "ldr r1, [%[a]]\n\t" + "lsr r3, r1, #24\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_0_3_%=\n\t" + "mov r2, #24\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_0_3_%=: \n\t" + "lsr r3, r1, #16\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_0_2_%=\n\t" + "mov r2, #16\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_0_2_%=: \n\t" + "lsr r3, r1, #8\n\t" + "and r3, r3, #0xff\n\t" + "cmp r3, #0\n\t" + "beq L_sp_521_num_bits_17_0_1_%=\n\t" + "mov r2, #8\n\t" + "ldrb r12, [lr, r3]\n\t" + "add r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_0_1_%=: \n\t" + "and r3, r1, #0xff\n\t" + "ldrb r12, [lr, r3]\n\t" + "\n" + "L_sp_521_num_bits_17_18_%=: \n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a) + : [L_sp_521_num_bits_17_table] "r" (L_sp_521_num_bits_17_table) + : "memory", "r1", "r2", "r3", "r12", "lr" + ); + return (uint32_t)(size_t)a; +} +#else +static int sp_521_num_bits_17(const sp_digit* a) +{ + __asm__ __volatile__ ( + "ldr r1, [%[a], #64]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_16_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x2\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x20\n\t" +#else + "mov r2, #0x220\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_16_%=: \n\t" + "ldr r1, [%[a], #60]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_15_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x2\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x0\n\t" +#else + "mov r2, #0x200\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_15_%=: \n\t" + "ldr r1, [%[a], #56]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_14_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xe0\n\t" +#else + "mov r2, #0x1e0\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_14_%=: \n\t" + "ldr r1, [%[a], #52]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_13_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xc0\n\t" +#else + "mov r2, #0x1c0\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_13_%=: \n\t" + "ldr r1, [%[a], #48]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_12_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0xa0\n\t" +#else + "mov r2, #0x1a0\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_12_%=: \n\t" + "ldr r1, [%[a], #44]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_11_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x80\n\t" +#else + "mov r2, #0x180\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_11_%=: \n\t" + "ldr r1, [%[a], #40]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_10_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x60\n\t" +#else + "mov r2, #0x160\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_10_%=: \n\t" + "ldr r1, [%[a], #36]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_9_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x40\n\t" +#else + "mov r2, #0x140\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_9_%=: \n\t" + "ldr r1, [%[a], #32]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_8_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x20\n\t" +#else + "mov r2, #0x120\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_8_%=: \n\t" + "ldr r1, [%[a], #28]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_7_%=\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "mov r2, #0x1\n\t" + "lsl r2, r2, #8\n\t" + "add r2, r2, #0x0\n\t" +#else + "mov r2, #0x100\n\t" +#endif + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_7_%=: \n\t" + "ldr r1, [%[a], #24]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_6_%=\n\t" + "mov r2, #0xe0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_6_%=: \n\t" + "ldr r1, [%[a], #20]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_5_%=\n\t" + "mov r2, #0xc0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_5_%=: \n\t" + "ldr r1, [%[a], #16]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_4_%=\n\t" + "mov r2, #0xa0\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_4_%=: \n\t" + "ldr r1, [%[a], #12]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_3_%=\n\t" + "mov r2, #0x80\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_3_%=: \n\t" + "ldr r1, [%[a], #8]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_2_%=\n\t" + "mov r2, #0x60\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_2_%=: \n\t" + "ldr r1, [%[a], #4]\n\t" + "cmp r1, #0\n\t" + "beq L_sp_521_num_bits_17_1_%=\n\t" + "mov r2, #0x40\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "b L_sp_521_num_bits_17_18_%=\n\t" + "\n" + "L_sp_521_num_bits_17_1_%=: \n\t" + "ldr r1, [%[a]]\n\t" + "mov r2, #32\n\t" + "clz r12, r1\n\t" + "sub r12, r2, r12\n\t" + "\n" + "L_sp_521_num_bits_17_18_%=: \n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a) + : + : "memory", "r1", "r2", "r3", "r12", "lr" + ); + return (uint32_t)(size_t)a; +} + +#endif /* WOLFSSL_SP_ARM_ARCH && (WOLFSSL_SP_ARM_ARCH < 7) */ /* Non-constant time modular inversion. * * @param [out] r Resulting number. @@ -60503,1754 +122881,9738 @@ typedef struct sp_point_1024 { static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) { __asm__ __volatile__ ( - "sub sp, sp, #64\n\t" + "sub sp, sp, #0x40\n\t" "mov r10, #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r11, [%[a], #0]\n\t" - "ldr r12, [%[b], #0]\n\t" + /* A[0] * B[0] */ + "ldr r11, [%[a]]\n\t" + "ldr r12, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r3, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "mov r5, #0\n\t" +#else "umull r3, r4, r11, r12\n\t" "mov r5, #0\n\t" +#endif "str r3, [sp]\n\t" - "# A[0] * B[1]\n\t" + /* A[0] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[0]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[0] */ "ldr r8, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #4]\n\t" - "# A[2] * B[0]\n\t" + /* A[2] * B[0] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[1] */ "ldr r11, [%[a], #4]\n\t" "ldr r12, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[2]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[2] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #8]\n\t" - "# A[0] * B[3]\n\t" + /* A[0] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[2]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[2] */ "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[1] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[0] */ "ldr r8, [%[a], #12]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #12]\n\t" - "# A[4] * B[0]\n\t" + /* A[4] * B[0] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[3] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[1] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[2] */ "ldr r11, [%[a], #8]\n\t" "ldr r12, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[3] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[4]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[4] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #16]\n\t" - "# A[0] * B[5]\n\t" + /* A[0] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[4]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[4] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[2] */ "ldr r8, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[1] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[0] */ "ldr r8, [%[a], #20]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #20]\n\t" - "# A[6] * B[0]\n\t" + /* A[6] * B[0] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[5] * B[1]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[1] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[2] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[3] */ "ldr r11, [%[a], #12]\n\t" "ldr r12, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[4] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[5] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[0] * B[6]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[0] * B[6] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #24]\n\t" - "# A[0] * B[7]\n\t" + /* A[0] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[6]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[6] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[5] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[4] */ "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[3] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[2] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[1]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[1] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[0] */ "ldr r8, [%[a], #28]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #28]\n\t" - "# A[8] * B[0]\n\t" + /* A[8] * B[0] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[7] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[1] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[2] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[3] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[4] */ "ldr r11, [%[a], #16]\n\t" "ldr r12, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[5] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[6] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[1] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[7] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[8]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[8] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #32]\n\t" - "# A[0] * B[9]\n\t" + /* A[0] * B[9] */ "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[8]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[8] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[7] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[6] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[4] */ "ldr r8, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[3] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[2] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[1] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[0] */ "ldr r8, [%[a], #36]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #36]\n\t" - "# A[10] * B[0]\n\t" + /* A[10] * B[0] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[9] * B[1]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[1] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[2] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[3] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[4] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[5] */ "ldr r11, [%[a], #20]\n\t" "ldr r12, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[6] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[7] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[8] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[9] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[0] * B[10]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[0] * B[10] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #40]\n\t" - "# A[0] * B[11]\n\t" + /* A[0] * B[11] */ "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[1] * B[10]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[10] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[9] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[8] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[7] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[6] */ "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[5] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[4] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[3] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[2] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[1]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[1] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[0] */ "ldr r8, [%[a], #44]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #44]\n\t" - "# A[12] * B[0]\n\t" + /* A[12] * B[0] */ "ldr r8, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[11] * B[1]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[1] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[2] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[3] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[4] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[5] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[6] */ "ldr r11, [%[a], #24]\n\t" "ldr r12, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[7] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[8] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[9] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[10] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[1] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[11] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[0] * B[12]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[0] * B[12] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #48]\n\t" - "# A[0] * B[13]\n\t" + /* A[0] * B[13] */ "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[1] * B[12]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[12] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[11]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[11] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[10]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[10] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[9] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[8] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[6] */ "ldr r8, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[5] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[4] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[3] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[2]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[2] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[1]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[1] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[0]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[0] */ "ldr r8, [%[a], #52]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #52]\n\t" - "# A[14] * B[0]\n\t" + /* A[14] * B[0] */ "ldr r8, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[13] * B[1]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[1] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[2] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[3] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[4] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[5] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[6] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[7] */ "ldr r11, [%[a], #28]\n\t" "ldr r12, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[8] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[9] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[10]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[10] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[3] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[11] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[2] * B[12]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[2] * B[12] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[1] * B[13]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * B[13] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[0] * B[14]\n\t" - "ldr r8, [%[a], #0]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[0] * B[14] */ + "ldr r8, [%[a]]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [sp, #56]\n\t" - "# A[0] * B[15]\n\t" + /* A[0] * B[15] */ "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[1] * B[14]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[1] * B[14] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[2] * B[13]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[2] * B[13] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[12]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[12] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[11] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[10] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[9] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[8] */ "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[7] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[6] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[5] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[4]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[4] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[12] * B[3]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[3] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[13] * B[2]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[2] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[14] * B[1]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[1] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[15] * B[0]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[0] */ "ldr r8, [%[a], #60]\n\t" - "ldr r9, [%[b], #0]\n\t" + "ldr r9, [%[b]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [sp, #60]\n\t" - "# A[15] * B[1]\n\t" + /* A[15] * B[1] */ "ldr r9, [%[b], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[14] * B[2]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[2] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[3]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[3] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[4] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[5] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[6] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[7] */ "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[8] */ "ldr r11, [%[a], #32]\n\t" "ldr r12, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[9] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[10]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[10] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[5] * B[11]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[11] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[4] * B[12]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[4] * B[12] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[3] * B[13]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[3] * B[13] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[2] * B[14]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[2] * B[14] */ "ldr r8, [%[a], #8]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[1] * B[15]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * B[15] */ "ldr r8, [%[a], #4]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #64]\n\t" - "# A[2] * B[15]\n\t" + /* A[2] * B[15] */ "ldr r8, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[3] * B[14]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[3] * B[14] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[4] * B[13]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[4] * B[13] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[12]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[12] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[11] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[10]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[10] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[9] */ "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[8] */ "ldr r8, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[7] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[6]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[6] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[5]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[5] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[13] * B[4]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[4] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[14] * B[3]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[3] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[15] * B[2]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[2] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #68]\n\t" - "# A[15] * B[3]\n\t" + /* A[15] * B[3] */ "ldr r9, [%[b], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[14] * B[4]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[4] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[13] * B[5]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[5] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[12] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[6] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[7] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[8] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[9] */ "ldr r11, [%[a], #36]\n\t" "ldr r12, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[10] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[7] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[11] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[6] * B[12]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[6] * B[12] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[5] * B[13]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[5] * B[13] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[4] * B[14]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[4] * B[14] */ "ldr r8, [%[a], #16]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[3] * B[15]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[3] * B[15] */ "ldr r8, [%[a], #12]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #72]\n\t" - "# A[4] * B[15]\n\t" + /* A[4] * B[15] */ "ldr r8, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[5] * B[14]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[5] * B[14] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[6] * B[13]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[6] * B[13] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[12]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[12] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[11]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[11] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[10]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[10] */ "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[9] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[8]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[8] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[7]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[7] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[6]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[6] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[14] * B[5]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[5] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[15] * B[4]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[15] * B[4] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #76]\n\t" - "# A[15] * B[5]\n\t" + /* A[15] * B[5] */ "ldr r9, [%[b], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[14] * B[6]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[6] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[13] * B[7]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[7] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[8] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[9] */ "ldr r8, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[10]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[10] */ "ldr r11, [%[a], #40]\n\t" "ldr r12, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[9] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[11] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[8] * B[12]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[8] * B[12] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[7] * B[13]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[7] * B[13] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[6] * B[14]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[6] * B[14] */ "ldr r8, [%[a], #24]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[5] * B[15]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[5] * B[15] */ "ldr r8, [%[a], #20]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #80]\n\t" - "# A[6] * B[15]\n\t" + /* A[6] * B[15] */ "ldr r8, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[7] * B[14]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[7] * B[14] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[8] * B[13]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[8] * B[13] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[12]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[12] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[11] */ "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[10]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[10] */ "ldr r8, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[12] * B[9]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[9] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[13] * B[8]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[8] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[14] * B[7]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[7] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[15] * B[6]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[6] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #84]\n\t" - "# A[15] * B[7]\n\t" + /* A[15] * B[7] */ "ldr r9, [%[b], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[14] * B[8]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[8] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[9]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[9] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[10]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[10] */ "ldr r8, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[11] * B[11]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[11] */ "ldr r11, [%[a], #44]\n\t" "ldr r12, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[10] * B[12]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[10] * B[12] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[9] * B[13]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[9] * B[13] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[8] * B[14]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[8] * B[14] */ "ldr r8, [%[a], #32]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[7] * B[15]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[7] * B[15] */ "ldr r8, [%[a], #28]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #88]\n\t" - "# A[8] * B[15]\n\t" + /* A[8] * B[15] */ "ldr r8, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[9] * B[14]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[9] * B[14] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[10] * B[13]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[10] * B[13] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[12]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[12] */ "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[11]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[11] */ "ldr r8, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[13] * B[10]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[10] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[14] * B[9]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[9] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[15] * B[8]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[8] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #92]\n\t" - "# A[15] * B[9]\n\t" + /* A[15] * B[9] */ "ldr r9, [%[b], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[14] * B[10]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[10] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[13] * B[11]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[11] */ "ldr r8, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[12] * B[12]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[12] * B[12] */ "ldr r11, [%[a], #48]\n\t" "ldr r12, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[11] * B[13]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[11] * B[13] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[10] * B[14]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[10] * B[14] */ "ldr r8, [%[a], #40]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[9] * B[15]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[9] * B[15] */ "ldr r8, [%[a], #36]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #96]\n\t" - "# A[10] * B[15]\n\t" + /* A[10] * B[15] */ "ldr r8, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[11] * B[14]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[11] * B[14] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[12] * B[13]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * B[13] */ "ldr r9, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[12]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[12] */ "ldr r8, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[14] * B[11]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[11] */ "ldr r8, [%[a], #56]\n\t" "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[15] * B[10]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[15] * B[10] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #100]\n\t" - "# A[15] * B[11]\n\t" + /* A[15] * B[11] */ "ldr r9, [%[b], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[14] * B[12]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[14] * B[12] */ "ldr r8, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[13] * B[13]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * B[13] */ "ldr r11, [%[a], #52]\n\t" "ldr r12, [%[b], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[12] * B[14]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[12] * B[14] */ "ldr r8, [%[a], #48]\n\t" "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" - "# A[11] * B[15]\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[11] * B[15] */ "ldr r8, [%[a], #44]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #104]\n\t" - "# A[12] * B[15]\n\t" + /* A[12] * B[15] */ "ldr r8, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" - "# A[13] * B[14]\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[13] * B[14] */ "ldr r9, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[14] * B[13]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[14] * B[13] */ "ldr r8, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" - "# A[15] * B[12]\n\t" + "adc r5, r5, #0\n\t" +#endif + /* A[15] * B[12] */ "ldr r8, [%[a], #60]\n\t" "ldr r9, [%[b], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], #108]\n\t" - "# A[15] * B[13]\n\t" + /* A[15] * B[13] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" - "# A[14] * B[14]\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[14] * B[14] */ "ldr r11, [%[a], #56]\n\t" "ldr r12, [%[b], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r11, r12\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" - "# A[13] * B[15]\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * B[15] */ "ldr r8, [%[a], #52]\n\t" "ldr r9, [%[b], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r3, r10\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #112]\n\t" - "# A[14] * B[15]\n\t" + /* A[14] * B[15] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r11, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r11, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r11, r9\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" - "# A[15] * B[14]\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[15] * B[14] */ "ldr r8, [%[a], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r12, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r12, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r12, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r12, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, r8, r12\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r4, r10\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r], #116]\n\t" - "# A[15] * B[15]\n\t" + /* A[15] * B[15] */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r9, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r9, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" + "lsr r6, r8, #16\n\t" + "lsr r7, r9, #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r9, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else "umull r6, r7, r8, r9\n\t" "adds r3, r3, r6\n\t" "adc r4, r4, r7\n\t" +#endif "str r3, [%[r], #120]\n\t" "str r4, [%[r], #124]\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" @@ -62261,9 +132623,8 @@ static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) "stm %[r]!, {r3, r4, r5, r6}\n\t" "ldm sp!, {r3, r4, r5, r6}\n\t" "stm %[r]!, {r3, r4, r5, r6}\n\t" - "sub %[r], %[r], #64\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } @@ -62276,109 +132637,515 @@ static void sp_1024_mul_16(sp_digit* r, const sp_digit* a, const sp_digit* b) static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "sub sp, sp, #64\n\t" - "mov r12, #0\n\t" - "# A[0] * A[0]\n\t" - "ldr r10, [%[a], #0]\n\t" + "sub sp, sp, #0x40\n\t" + /* A[0] * A[0] */ + "ldr r10, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r9, r10, #16\n\t" + "lsl r2, r10, #16\n\t" + "lsr r2, r2, #16\n\t" + "mul r8, r2, r2\n\t" + "mul r3, r9, r9\n\t" + "mul r2, r9, r2\n\t" + "lsr r9, r2, #15\n\t" + "lsl r2, r2, #17\n\t" + "adds r8, r8, r2\n\t" + "adc r3, r3, r9\n\t" +#else "umull r8, r3, r10, r10\n\t" +#endif "mov r4, #0\n\t" "str r8, [sp]\n\t" - "# A[0] * A[1]\n\t" + /* A[0] * A[1] */ "ldr r10, [%[a], #4]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #4]\n\t" - "# A[0] * A[2]\n\t" + /* A[0] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[1] * A[1]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[1] * A[1] */ "ldr r10, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [sp, #8]\n\t" - "# A[0] * A[3]\n\t" + /* A[0] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[1] * A[2]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[1] * A[2] */ "ldr r10, [%[a], #8]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [sp, #12]\n\t" - "# A[0] * A[4]\n\t" + /* A[0] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[1] * A[3]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[1] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[2] * A[2]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[2] * A[2] */ "ldr r10, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [sp, #16]\n\t" - "# A[0] * A[5]\n\t" + /* A[0] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[4]\n\t" + /* A[1] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[3] */ "ldr r10, [%[a], #12]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -62386,66 +133153,294 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #20]\n\t" - "# A[0] * A[6]\n\t" + /* A[0] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[5]\n\t" + /* A[1] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[3]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[3] */ "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #24]\n\t" - "# A[0] * A[7]\n\t" + /* A[0] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[6]\n\t" + /* A[1] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[4] */ "ldr r10, [%[a], #16]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -62453,80 +133448,370 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #28]\n\t" - "# A[0] * A[8]\n\t" + /* A[0] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[7]\n\t" + /* A[1] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[4]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[4] */ "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #32]\n\t" - "# A[0] * A[9]\n\t" + /* A[0] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[8]\n\t" + /* A[1] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[5] */ "ldr r10, [%[a], #20]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -62534,94 +133819,446 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #36]\n\t" - "# A[0] * A[10]\n\t" + /* A[0] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[9]\n\t" + /* A[1] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[5]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[5] */ "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #40]\n\t" - "# A[0] * A[11]\n\t" + /* A[0] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[10]\n\t" + /* A[1] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[6] */ "ldr r10, [%[a], #24]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -62629,108 +134266,522 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #44]\n\t" - "# A[0] * A[12]\n\t" + /* A[0] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[11]\n\t" + /* A[1] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[6]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[6] */ "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #48]\n\t" - "# A[0] * A[13]\n\t" + /* A[0] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[12]\n\t" + /* A[1] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[7] */ "ldr r10, [%[a], #28]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -62738,122 +134789,598 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [sp, #52]\n\t" - "# A[0] * A[14]\n\t" + /* A[0] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[13]\n\t" + /* A[1] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[7]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[7] */ "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [sp, #56]\n\t" - "# A[0] * A[15]\n\t" + /* A[0] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #0]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[1] * A[14]\n\t" + /* A[1] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[2] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[2] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[8] */ "ldr r10, [%[a], #32]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -62861,115 +135388,560 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [sp, #60]\n\t" - "# A[1] * A[15]\n\t" + /* A[1] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #4]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[2] * A[14]\n\t" + /* A[2] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[3] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[3] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[8]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[8] */ "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #64]\n\t" - "# A[2] * A[15]\n\t" + /* A[2] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #8]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[3] * A[14]\n\t" + /* A[3] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[4] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[4] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[9] */ "ldr r10, [%[a], #36]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -62977,101 +135949,484 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #68]\n\t" - "# A[3] * A[15]\n\t" + /* A[3] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #12]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[4] * A[14]\n\t" + /* A[4] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[5] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[5] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[9]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[9] */ "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #72]\n\t" - "# A[4] * A[15]\n\t" + /* A[4] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #16]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[5] * A[14]\n\t" + /* A[5] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[6] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[6] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[10] */ "ldr r10, [%[a], #40]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -63079,87 +136434,408 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #76]\n\t" - "# A[5] * A[15]\n\t" + /* A[5] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #20]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[6] * A[14]\n\t" + /* A[6] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[7] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[7] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[10] * A[10]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[10] */ "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif "adds r4, r4, r5\n\t" "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #80]\n\t" - "# A[6] * A[15]\n\t" + /* A[6] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #24]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[7] * A[14]\n\t" + /* A[7] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[8] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[8] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[10] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[11] */ "ldr r10, [%[a], #44]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -63167,73 +136843,332 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #84]\n\t" - "# A[7] * A[15]\n\t" + /* A[7] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #28]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[8] * A[14]\n\t" + /* A[8] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[9] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[9] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[10] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[11] * A[11]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[11] * A[11] */ "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#endif "adds r3, r3, r5\n\t" "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #88]\n\t" - "# A[8] * A[15]\n\t" + /* A[8] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #32]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r3, #0\n\t" "mov r7, #0\n\t" - "# A[9] * A[14]\n\t" + /* A[9] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[10] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[10] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[11] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[11] * A[12] */ "ldr r10, [%[a], #48]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -63241,59 +137176,256 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r2, r2, r6\n\t" "adc r3, r3, r7\n\t" "str r4, [%[r], #92]\n\t" - "# A[9] * A[15]\n\t" + /* A[9] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #36]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r4, #0\n\t" "mov r7, #0\n\t" - "# A[10] * A[14]\n\t" + /* A[10] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[11] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[11] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[12] * A[12]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[12] * A[12] */ "ldr r10, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adc r7, r7, r7\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" - "adds r5, r5, r8\n\t" - "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "adds r2, r2, r5\n\t" "adcs r3, r3, r6\n\t" "adc r4, r4, r7\n\t" "str r2, [%[r], #96]\n\t" - "# A[10] * A[15]\n\t" + /* A[10] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #40]\n\t" - "umull r5, r6, r10, r8\n\t" + "ldr r12, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r5, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r8, r5\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r6, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, #0\n\t" + "lsr r8, r10, #16\n\t" + "mul r9, r8, r9\n\t" + "add r6, r6, r9\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adc r6, r6, r9\n\t" +#else + "umull r5, r6, r10, r12\n\t" +#endif "mov r2, #0\n\t" "mov r7, #0\n\t" - "# A[11] * A[14]\n\t" + /* A[11] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" - "# A[12] * A[13]\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif + /* A[12] * A[13] */ "ldr r10, [%[a], #52]\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r5, r5, r9\n\t" + "adcs r6, r6, #0\n\t" + "adc r7, r7, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r5, r5, r8\n\t" "adcs r6, r6, r9\n\t" - "adc r7, r7, r12\n\t" + "adc r7, r7, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r5, r5, r8\n\t" + "adcs r6, r6, r9\n\t" + "adc r7, r7, #0\n\t" +#endif "adds r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" "adc r7, r7, r7\n\t" @@ -63301,87 +137433,415 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "adcs r4, r4, r6\n\t" "adc r2, r2, r7\n\t" "str r3, [%[r], #100]\n\t" - "# A[11] * A[15]\n\t" + /* A[11] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #44]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[12] * A[14]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[12] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" - "# A[13] * A[13]\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#endif + /* A[13] * A[13] */ "ldr r10, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #104]\n\t" - "# A[12] * A[15]\n\t" + /* A[12] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #48]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r12, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" - "# A[13] * A[14]\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif + /* A[13] * A[14] */ "ldr r10, [%[a], #56]\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" "adds r2, r2, r8\n\t" "adcs r3, r3, r9\n\t" - "adc r4, r4, r12\n\t" + "adc r4, r4, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" + "adds r2, r2, r8\n\t" + "adcs r3, r3, r9\n\t" + "adc r4, r4, #0\n\t" +#endif "str r2, [%[r], #108]\n\t" - "# A[13] * A[15]\n\t" + /* A[13] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #52]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, #0\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r12, r12\n\t" + "adc r2, r2, #0\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" - "# A[14] * A[14]\n\t" + "adc r2, r2, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "mov r2, #0\n\t" + "adc r2, r2, #0\n\t" +#endif + /* A[14] * A[14] */ "ldr r10, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r3, r3, r8\n\t" + "adcs r4, r4, r9\n\t" + "adc r2, r2, #0\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r3, r3, r8\n\t" "adcs r4, r4, r9\n\t" - "adc r2, r2, r12\n\t" + "adc r2, r2, #0\n\t" +#endif "str r3, [%[r], #112]\n\t" - "# A[14] * A[15]\n\t" + /* A[14] * A[15] */ "ldr r10, [%[a], #60]\n\t" - "ldr r8, [%[a], #56]\n\t" - "umull r8, r9, r10, r8\n\t" + "ldr r12, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsl r9, r12, #16\n\t" + "lsr r8, r8, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r9\n\t" + "adcs r2, r2, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r9, r12, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r12, r12\n\t" + "adc r3, r3, #0\n\t" "adds r4, r4, r8\n\t" "adcs r2, r2, r9\n\t" - "adc r3, r3, r12\n\t" + "adc r3, r3, #0\n\t" + "lsr r8, r10, #16\n\t" + "lsr r9, r12, #16\n\t" + "mul r9, r8, r9\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "lsl r9, r12, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #16\n\t" + "lsl r8, r8, #16\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "adc r3, r3, #0\n\t" +#else + "umull r8, r9, r10, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "adds r4, r4, r8\n\t" + "adcs r2, r2, r9\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r], #116]\n\t" - "# A[15] * A[15]\n\t" + /* A[15] * A[15] */ "ldr r10, [%[a], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r8, r10, #16\n\t" + "lsr r9, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mov r12, r8\n\t" + "mul r8, r12, r8\n\t" + "mov r12, r9\n\t" + "mul r9, r12, r9\n\t" + "adds r2, r2, r8\n\t" + "adc r3, r3, r9\n\t" + "lsr r9, r10, #16\n\t" + "lsl r8, r10, #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r8, r9, r8\n\t" + "lsr r9, r8, #15\n\t" + "lsl r8, r8, #17\n\t" + "adds r2, r2, r8\n\t" + "adc r3, r3, r9\n\t" +#else "umull r8, r9, r10, r10\n\t" "adds r2, r2, r8\n\t" "adc r3, r3, r9\n\t" +#endif "str r2, [%[r], #120]\n\t" "str r3, [%[r], #124]\n\t" "ldm sp!, {r2, r3, r4, r8}\n\t" @@ -63392,10 +137852,9 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) "stm %[r]!, {r2, r3, r4, r8}\n\t" "ldm sp!, {r2, r3, r4, r8}\n\t" "stm %[r]!, {r2, r3, r4, r8}\n\t" - "sub %[r], %[r], #64\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r8", "r9", "r10", "r8", "r5", "r6", "r7", "r12" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); } @@ -63405,48 +137864,44 @@ static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "adc %[c], r14, r14\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "adc %[r], r12, r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); - - return c; + return (uint32_t)(size_t)r; } /* Sub b from a into a. (a -= b) @@ -63456,72 +137911,69 @@ static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, */ static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "subs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r11, r11\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc %[a], r9, r9\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); - - return c; + return (uint32_t)(size_t)a; } /* Add b to a into r. (r = a + b) @@ -63530,76 +137982,72 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adds r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "adcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "adc %[c], r14, r14\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "adc %[r], r12, r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12" ); - - return c; + return (uint32_t)(size_t)r; } /* AND m into each word of a and store in r. @@ -63677,47 +138125,43 @@ SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); - - return c; + return (uint32_t)(size_t)r; } /* Square a and put result in r. (r = a * a) @@ -63766,54 +138210,81 @@ SP_NOINLINE static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { __asm__ __volatile__ ( - "sub sp, sp, #256\n\t" + "sub sp, sp, #0x100\n\t" "mov r5, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #124\n\t" + "\n" + "L_sp_1024_mul_32_outer_%=: \n\t" + "subs r3, r5, #0x7c\n\t" "it cc\n\t" "movcc r3, #0\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r12, [%[b], r4]\n\t" - "umull r9, r10, r14, r12\n\t" + "\n" + "L_sp_1024_mul_32_inner_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[b], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #128\n\t" - "beq 3f\n\t" + "cmp r3, #0x80\n\t" + "beq L_sp_1024_mul_32_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_1024_mul_32_inner_%=\n\t" + "\n" + "L_sp_1024_mul_32_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #248\n\t" - "ble 1b\n\t" + "cmp r5, #0xf8\n\t" + "ble L_sp_1024_mul_32_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r3, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r3, [%[r], #12]\n\t" - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_1024_mul_32_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + "bgt L_sp_1024_mul_32_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11" ); } @@ -63825,77 +138296,132 @@ static void sp_1024_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "sub sp, sp, #256\n\t" + "sub sp, sp, #0x100\n\t" "mov r12, #0\n\t" "mov r6, #0\n\t" "mov r7, #0\n\t" "mov r8, #0\n\t" "mov r5, #0\n\t" - "\n1:\n\t" - "subs r3, r5, #124\n\t" + "\n" + "L_sp_1024_sqr_32_outer_%=: \n\t" + "subs r3, r5, #0x7c\n\t" "it cc\n\t" "movcc r3, r12\n\t" "sub r4, r5, r3\n\t" - "\n2:\n\t" + "\n" + "L_sp_1024_sqr_32_inner_%=: \n\t" "cmp r4, r3\n\t" - "beq 4f\n\t" - "ldr r14, [%[a], r3]\n\t" - "ldr r9, [%[a], r4]\n\t" - "umull r9, r10, r14, r9\n\t" + "beq L_sp_1024_sqr_32_op_sqr_%=\n\t" + "ldr lr, [%[a], r3]\n\t" + "ldr r11, [%[a], r4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsl r10, r11, #16\n\t" + "lsr r9, r9, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r10\n\t" + "adcs r7, r7, #0\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, r11, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" + "adc r8, r8, #0\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "bal 5f\n\t" - "\n4:\n\t" - "ldr r14, [%[a], r3]\n\t" - "umull r9, r10, r14, r14\n\t" + "adc r8, r8, #0\n\t" + "lsr r9, lr, #16\n\t" + "lsr r10, r11, #16\n\t" + "mul r10, r9, r10\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsl r10, r11, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #16\n\t" + "lsl r9, r9, #16\n\t" "adds r6, r6, r9\n\t" "adcs r7, r7, r10\n\t" - "adc r8, r8, r12\n\t" - "\n5:\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, r11\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "bal L_sp_1024_sqr_32_op_done_%=\n\t" + "\n" + "L_sp_1024_sqr_32_op_sqr_%=: \n\t" + "ldr lr, [%[a], r3]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r9, lr, #16\n\t" + "lsr r10, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mov r11, r9\n\t" + "mul r9, r11, r9\n\t" + "mov r11, r10\n\t" + "mul r10, r11, r10\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" + "lsr r10, lr, #16\n\t" + "lsl r9, lr, #16\n\t" + "lsr r9, r9, #16\n\t" + "mul r9, r10, r9\n\t" + "lsr r10, r9, #15\n\t" + "lsl r9, r9, #17\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#else + "umull r9, r10, lr, lr\n\t" + "adds r6, r6, r9\n\t" + "adcs r7, r7, r10\n\t" + "adc r8, r8, #0\n\t" +#endif + "\n" + "L_sp_1024_sqr_32_op_done_%=: \n\t" "add r3, r3, #4\n\t" "sub r4, r4, #4\n\t" - "cmp r3, #128\n\t" - "beq 3f\n\t" + "cmp r3, #0x80\n\t" + "beq L_sp_1024_sqr_32_inner_done_%=\n\t" "cmp r3, r4\n\t" - "bgt 3f\n\t" + "bgt L_sp_1024_sqr_32_inner_done_%=\n\t" "cmp r3, r5\n\t" - "ble 2b\n\t" - "\n3:\n\t" + "ble L_sp_1024_sqr_32_inner_%=\n\t" + "\n" + "L_sp_1024_sqr_32_inner_done_%=: \n\t" "str r6, [sp, r5]\n\t" "mov r6, r7\n\t" "mov r7, r8\n\t" "mov r8, #0\n\t" "add r5, r5, #4\n\t" - "cmp r5, #248\n\t" - "ble 1b\n\t" + "cmp r5, #0xf8\n\t" + "ble L_sp_1024_sqr_32_outer_%=\n\t" "str r6, [sp, r5]\n\t" - "\n4:\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r6, [sp, #0]\n\t" - "ldr r7, [sp, #4]\n\t" - "ldr r8, [sp, #8]\n\t" - "ldr r9, [sp, #12]\n\t" - "str r6, [%[r], #0]\n\t" - "str r7, [%[r], #4]\n\t" - "str r8, [%[r], #8]\n\t" - "str r9, [%[r], #12]\n\t" -#else - "ldrd r6, r7, [sp, #0]\n\t" - "ldrd r8, r9, [sp, #8]\n\t" - "strd r6, r7, [%[r], #0]\n\t" - "strd r8, r9, [%[r], #8]\n\t" -#endif - "add sp, sp, #16\n\t" - "add %[r], %[r], #16\n\t" + "\n" + "L_sp_1024_sqr_32_store_%=: \n\t" + "ldm sp!, {r6, r7, r8, r9}\n\t" + "stm %[r]!, {r6, r7, r8, r9}\n\t" "subs r5, r5, #16\n\t" - "bgt 4b\n\t" - : [r] "+r" (r) - : [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r9", "r12" + "bgt L_sp_1024_sqr_32_store_%=\n\t" + : [r] "+r" (r), [a] "+r" (a) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "lr", "r11", "r12" ); } @@ -63993,32 +138519,33 @@ static const sp_point_1024 p1024_base = { */ static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r14, #0\n\t" - "add r12, %[a], #128\n\t" - "\n1:\n\t" - "subs %[c], r14, %[c]\n\t" - "ldm %[a], {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r10, #0\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x80\n\t" + "\n" + "L_sp_1024_sub_in_pkace_32_word_%=: \n\t" + "subs r12, r10, r12\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "ldm %[b]!, {r6, r7, r8, r9}\n\t" + "sbcs r2, r2, r6\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[a]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r14, r14\n\t" - "cmp %[a], r12\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "sbc r12, r10, r10\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_1024_sub_in_pkace_32_word_%=\n\t" + "mov %[a], r12\n\t" + : [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r12", "lr", "r10" ); - - return c; + return (uint32_t)(size_t)a; } #endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL /* Conditionally subtract b from a using the mask m. * m is -1 to subtract and 0 when not copying. * @@ -64027,34 +138554,45 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) * b A single precision number to subtract. * m Mask value to apply. */ -static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "mov r9, #0\n\t" - "mov r8, #0\n\t" - "1:\n\t" - "subs %[c], r9, %[c]\n\t" - "ldr r4, [%[a], r8]\n\t" - "ldr r6, [%[b], r8]\n\t" - "and r6, r6, %[m]\n\t" - "sbcs r4, r4, r6\n\t" - "sbc %[c], r9, r9\n\t" - "str r4, [%[r], r8]\n\t" - "add r8, r8, #4\n\t" - "cmp r8, #128\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "mov r6, #0\n\t" + "mov r12, #0\n\t" + "mov lr, #0\n\t" + "\n" + "L_sp_1024_cond_sub_32_words_%=: \n\t" + "subs r12, r6, r12\n\t" + "ldr r4, [%[a], lr]\n\t" + "ldr r5, [%[b], lr]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc r12, r6, r6\n\t" + "str r4, [%[r], lr]\n\t" + "add lr, lr, #4\n\t" + "cmp lr, #0x80\n\t" + "blt L_sp_1024_cond_sub_32_words_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6" ); -#else - __asm__ __volatile__ ( + return (uint32_t)(size_t)r; +} - "mov r9, #0\n\t" +#else +/* Conditionally subtract b from a using the mask m. + * m is -1 to subtract and 0 when not copying. + * + * r A single precision number representing condition subtract result. + * a A single precision number to subtract from. + * b A single precision number to subtract. + * m Mask value to apply. + */ +static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) +{ + __asm__ __volatile__ ( + "mov lr, #0\n\t" "ldm %[a]!, {r4, r5}\n\t" "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" @@ -64167,16 +138705,15 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" "stm %[r]!, {r4, r5}\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) - : [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9" + "sbc %[r], lr, lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7" ); -#endif /* WOLFSSL_SP_SMALL */ - - return c; + return (uint32_t)(size_t)r; } +#endif /* WOLFSSL_SP_SMALL */ #ifdef WOLFSSL_SP_SMALL /* Add b to a into r. (r = a + b) * @@ -64184,15 +138721,14 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #128\n\t" - "\n1:\n\t" - "adds %[c], %[c], #-1\n\t" + "mov r3, #0\n\t" + "add r12, %[a], #0x80\n\t" + "\n" + "L_sp_1024_add_32_word_%=: \n\t" + "adds r3, r3, #-1\n\t" "ldm %[a]!, {r4, r5, r6, r7}\n\t" "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" @@ -64201,287 +138737,1395 @@ static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, "adcs r7, r7, r11\n\t" "stm %[r]!, {r4, r5, r6, r7}\n\t" "mov r4, #0\n\t" - "adc %[c], r4, #0\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "adc r3, r4, #0\n\t" + "cmp %[a], r12\n\t" + "bne L_sp_1024_add_32_word_%=\n\t" + "mov %[r], r3\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r3", "r12" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_SMALL /* Mul a by digit b into r. (r = a * b) * * r A single precision integer. * a A single precision integer. * b A single precision digit. */ -static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, - sp_digit b) +static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) { -#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r5, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r5, r5, #16\n\t" + "mul r5, r6, r5\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r3, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r3, r3, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adc r3, r3, r7\n\t" +#else "umull r5, r3, %[b], r8\n\t" +#endif "mov r4, #0\n\t" "str r5, [%[r]]\n\t" "mov r5, #0\n\t" "mov r9, #4\n\t" - "1:\n\t" + "\n" + "L_sp_1024_mul_d_32_word_%=: \n\t" + /* A[i] * B */ "ldr r8, [%[a], r9]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r5, r10\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r], r9]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" "add r9, r9, #4\n\t" - "cmp r9, #128\n\t" - "blt 1b\n\t" + "cmp r9, #0x80\n\t" + "blt L_sp_1024_mul_d_32_word_%=\n\t" "str r3, [%[r], #128]\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); +} + #else +/* Mul a by digit b into r. (r = a * b) + * + * r A single precision integer. + * a A single precision integer. + * b A single precision digit. + */ +static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) +{ __asm__ __volatile__ ( "mov r10, #0\n\t" - "# A[0] * B\n\t" + /* A[0] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r3, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r3, r3, #16\n\t" + "mul r3, r6, r3\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r4, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "mul r7, r6, r7\n\t" + "add r4, r4, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adc r4, r4, r7\n\t" +#else "umull r3, r4, %[b], r8\n\t" +#endif "mov r5, #0\n\t" "str r3, [%[r]], #4\n\t" - "# A[1] * B\n\t" + /* A[1] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[2] * B\n\t" + /* A[2] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[3] * B\n\t" + /* A[3] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[4] * B\n\t" + /* A[4] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[5] * B\n\t" + /* A[5] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[6] * B\n\t" + /* A[6] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[7] * B\n\t" + /* A[7] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[8] * B\n\t" + /* A[8] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[9] * B\n\t" + /* A[9] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[10] * B\n\t" + /* A[10] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[11] * B\n\t" + /* A[11] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[12] * B\n\t" + /* A[12] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[13] * B\n\t" + /* A[13] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[14] * B\n\t" + /* A[14] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[15] * B\n\t" + /* A[15] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[16] * B\n\t" + /* A[16] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[17] * B\n\t" + /* A[17] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[18] * B\n\t" + /* A[18] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[19] * B\n\t" + /* A[19] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[20] * B\n\t" + /* A[20] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[21] * B\n\t" + /* A[21] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[22] * B\n\t" + /* A[22] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[23] * B\n\t" + /* A[23] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[24] * B\n\t" + /* A[24] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[25] * B\n\t" + /* A[25] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[26] * B\n\t" + /* A[26] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[27] * B\n\t" + /* A[27] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[28] * B\n\t" + /* A[28] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adcs r5, r5, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" - "adc r3, r10, r10\n\t" + "mov r3, #0\n\t" + "adc r3, r3, #0\n\t" +#endif "str r4, [%[r]], #4\n\t" - "# A[29] * B\n\t" + /* A[29] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r3, r3, #0\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" - "adc r4, r10, r10\n\t" + "mov r4, #0\n\t" + "adc r4, r4, #0\n\t" +#endif "str r5, [%[r]], #4\n\t" - "# A[30] * B\n\t" + /* A[30] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r3, r3, r7\n\t" + "adcs r4, r4, #0\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" - "adc r5, r10, r10\n\t" + "mov r5, #0\n\t" + "adc r5, r5, #0\n\t" +#endif "str r3, [%[r]], #4\n\t" - "# A[31] * B\n\t" + /* A[31] * B */ "ldr r8, [%[a]], #4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, %[b], #16\n\t" + "lsl r7, r8, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r8, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" + "lsr r6, %[b], #16\n\t" + "lsr r7, r8, #16\n\t" + "mul r7, r6, r7\n\t" + "add r5, r5, r7\n\t" + "lsl r7, r8, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" +#endif "str r4, [%[r]], #4\n\t" "str r5, [%[r]]\n\t" - : [r] "+r" (r), [a] "+r" (a) - : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10" ); -#endif } +#endif /* WOLFSSL_SP_SMALL */ +#ifdef WOLFSSL_SP_USE_UDIV /* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) * * d1 The high order half of the number to divide. @@ -64493,57 +140137,184 @@ static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, */ static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) { - sp_digit r = 0; - __asm__ __volatile__ ( - "lsr r5, %[div], #1\n\t" - "add r5, r5, #1\n\t" - "mov r6, %[d0]\n\t" - "mov r7, %[d1]\n\t" - "# Do top 32\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "# Next 30 bits\n\t" - "mov r4, #29\n\t" - "1:\n\t" - "movs r6, r6, lsl #1\n\t" - "adc r7, r7, r7\n\t" - "subs r8, r5, r7\n\t" - "sbc r8, r8, r8\n\t" - "add %[r], %[r], %[r]\n\t" - "sub %[r], %[r], r8\n\t" - "and r8, r8, r5\n\t" - "subs r7, r7, r8\n\t" - "subs r4, r4, #1\n\t" - "bpl 1b\n\t" - "add %[r], %[r], %[r]\n\t" - "add %[r], %[r], #1\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "umull r4, r5, %[r], %[div]\n\t" - "subs r4, %[d0], r4\n\t" - "sbc r5, %[d1], r5\n\t" - "add %[r], %[r], r5\n\t" - "subs r8, %[div], r4\n\t" - "sbc r8, r8, r8\n\t" - "sub %[r], %[r], r8\n\t" - : [r] "+r" (r) - : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7", "r8" + "lsr r6, %[div], #16\n\t" + "add lr, r6, #1\n\t" + "udiv r4, %[d1], lr\n\t" + "lsl r5, %[div], #16\n\t" + "lsl r4, r4, #16\n\t" + "umull r3, r12, %[div], r4\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "subs r3, %[d1], lr\n\t" + "sbc r7, r7, r7\n\t" + "add r7, r7, #1\n\t" + "rsb r8, r7, #0\n\t" + "lsl r7, r7, #16\n\t" + "and r5, r5, r8\n\t" + "and r6, r6, r8\n\t" + "subs %[d0], %[d0], r5\n\t" + "add r4, r4, r7\n\t" + "sbc %[d1], %[d1], r6\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "umull r3, r12, %[div], r3\n\t" + "subs %[d0], %[d0], r3\n\t" + "sbc %[d1], %[d1], r12\n\t" + "lsl r12, %[d1], #16\n\t" + "lsr r3, %[d0], #16\n\t" + "orr r3, r3, r12\n\t" + "udiv r3, r3, lr\n\t" + "add r4, r4, r3\n\t" + "mul r3, %[div], r3\n\t" + "sub %[d0], %[d0], r3\n\t" + "udiv r3, %[d0], %[div]\n\t" + "add %[d1], r4, r3\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); - return r; + return (uint32_t)(size_t)d1; } +#else +/* Divide the double width number (d1|d0) by the divisor. (d1|d0 / div) + * + * d1 The high order half of the number to divide. + * d0 The low order half of the number to divide. + * div The divisor. + * returns the result of the division. + * + * Note that this is an approximate div. It may give an answer 1 larger. + */ +static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0, sp_digit div) +{ + __asm__ __volatile__ ( + "lsr lr, %[div], #1\n\t" + "add lr, lr, #1\n\t" + "mov r4, %[d0]\n\t" + "mov r5, %[d1]\n\t" + /* Do top 32 */ + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "mov r3, #0\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + /* Next 30 bits */ + "mov r12, #29\n\t" + "\n" + "L_div_1024_word_32_bit_%=: \n\t" + "lsls r4, r4, #1\n\t" + "adc r5, r5, r5\n\t" + "subs r6, lr, r5\n\t" + "sbc r6, r6, r6\n\t" + "add r3, r3, r3\n\t" + "sub r3, r3, r6\n\t" + "and r6, r6, lr\n\t" + "subs r5, r5, r6\n\t" + "subs r12, r12, #1\n\t" + "bpl L_div_1024_word_32_bit_%=\n\t" + "add r3, r3, r3\n\t" + "add r3, r3, #1\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r7, r3, #16\n\t" + "lsl r4, %[div], #16\n\t" + "lsr r7, r7, #16\n\t" + "lsr r4, r4, #16\n\t" + "mul r4, r7, r4\n\t" + "lsr r8, %[div], #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r5, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r3, #16\n\t" + "mul r8, r7, r8\n\t" + "add r5, r5, r8\n\t" + "lsl r8, %[div], #16\n\t" + "lsr r8, r8, #16\n\t" + "mul r7, r8, r7\n\t" + "lsr r8, r7, #16\n\t" + "lsl r7, r7, #16\n\t" + "adds r4, r4, r7\n\t" + "adc r5, r5, r8\n\t" +#else + "umull r4, r5, r3, %[div]\n\t" +#endif + "subs r7, %[d0], r4\n\t" + "sbc r8, %[d1], r5\n\t" + "add r3, r3, r8\n\t" + "subs r6, %[div], r7\n\t" + "sbc r6, r6, r6\n\t" + "sub %[d1], r3, r6\n\t" + : [d1] "+r" (d1), [d0] "+r" (d0), [div] "+r" (div) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8" + ); + return (uint32_t)(size_t)d1; +} + +#endif /* AND m into each word of a and store in r. * * r A single precision integer. @@ -64583,398 +140354,390 @@ static void sp_1024_mask_32(sp_digit* r, const sp_digit* a, sp_digit m) */ static sp_int32 sp_1024_cmp_32(const sp_digit* a, const sp_digit* b) { - sp_digit r = -1; - sp_digit one = 1; - - + __asm__ __volatile__ ( + "mov r2, #-1\n\t" + "mov r6, #1\n\t" + "mov r5, #0\n\t" + "mov r3, #-1\n\t" #ifdef WOLFSSL_SP_SMALL - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "mov r6, #124\n\t" - "1:\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "mov r4, #0x7c\n\t" + "\n" + "L_sp_1024_cmp_32_words_%=: \n\t" + "ldr r12, [%[a], r4]\n\t" + "ldr lr, [%[b], r4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "subs r6, r6, #4\n\t" - "bcs 1b\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" - ); + "movne r3, r5\n\t" + "subs r4, r4, #4\n\t" + "bcs L_sp_1024_cmp_32_words_%=\n\t" + "eor r2, r2, r3\n\t" #else - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r3, #-1\n\t" - "ldr r4, [%[a], #124]\n\t" - "ldr r5, [%[b], #124]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "ldr r12, [%[a], #124]\n\t" + "ldr lr, [%[b], #124]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[b], #120]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #120]\n\t" + "ldr lr, [%[b], #120]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #116]\n\t" - "ldr r5, [%[b], #116]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #116]\n\t" + "ldr lr, [%[b], #116]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[b], #112]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #112]\n\t" + "ldr lr, [%[b], #112]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #108]\n\t" - "ldr r5, [%[b], #108]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #108]\n\t" + "ldr lr, [%[b], #108]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[b], #104]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #104]\n\t" + "ldr lr, [%[b], #104]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #100]\n\t" - "ldr r5, [%[b], #100]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #100]\n\t" + "ldr lr, [%[b], #100]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[b], #96]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #96]\n\t" + "ldr lr, [%[b], #96]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #92]\n\t" - "ldr r5, [%[b], #92]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #92]\n\t" + "ldr lr, [%[b], #92]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[b], #88]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #88]\n\t" + "ldr lr, [%[b], #88]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #84]\n\t" - "ldr r5, [%[b], #84]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #84]\n\t" + "ldr lr, [%[b], #84]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[b], #80]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #80]\n\t" + "ldr lr, [%[b], #80]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #76]\n\t" - "ldr r5, [%[b], #76]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #76]\n\t" + "ldr lr, [%[b], #76]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[b], #72]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #72]\n\t" + "ldr lr, [%[b], #72]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #68]\n\t" - "ldr r5, [%[b], #68]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #68]\n\t" + "ldr lr, [%[b], #68]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[b], #64]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #64]\n\t" + "ldr lr, [%[b], #64]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #60]\n\t" - "ldr r5, [%[b], #60]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #60]\n\t" + "ldr lr, [%[b], #60]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[b], #56]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #56]\n\t" + "ldr lr, [%[b], #56]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #52]\n\t" - "ldr r5, [%[b], #52]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #52]\n\t" + "ldr lr, [%[b], #52]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[b], #48]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #48]\n\t" + "ldr lr, [%[b], #48]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #44]\n\t" - "ldr r5, [%[b], #44]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #44]\n\t" + "ldr lr, [%[b], #44]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[b], #40]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #40]\n\t" + "ldr lr, [%[b], #40]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #36]\n\t" - "ldr r5, [%[b], #36]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #36]\n\t" + "ldr lr, [%[b], #36]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[b], #32]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #32]\n\t" + "ldr lr, [%[b], #32]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #28]\n\t" - "ldr r5, [%[b], #28]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #28]\n\t" + "ldr lr, [%[b], #28]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[b], #24]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #24]\n\t" + "ldr lr, [%[b], #24]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #20]\n\t" - "ldr r5, [%[b], #20]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #20]\n\t" + "ldr lr, [%[b], #20]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[b], #16]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #16]\n\t" + "ldr lr, [%[b], #16]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #12]\n\t" - "ldr r5, [%[b], #12]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #12]\n\t" + "ldr lr, [%[b], #12]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[b], #8]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #8]\n\t" + "ldr lr, [%[b], #8]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #4]\n\t" - "ldr r5, [%[b], #4]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a], #4]\n\t" + "ldr lr, [%[b], #4]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[b], #0]\n\t" - "and r4, r4, r3\n\t" - "and r5, r5, r3\n\t" - "subs r4, r4, r5\n\t" + "movne r3, r5\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[b]]\n\t" + "and r12, r12, r3\n\t" + "and lr, lr, r3\n\t" + "subs r12, r12, lr\n\t" "it hi\n\t" - "movhi %[r], %[one]\n\t" + "movhi r2, r6\n\t" "it lo\n\t" - "movlo %[r], r3\n\t" + "movlo r2, r3\n\t" "it ne\n\t" - "movne r3, r7\n\t" - "eor %[r], %[r], r3\n\t" - : [r] "+r" (r) - : [a] "r" (a), [b] "r" (b), [one] "r" (one) - : "r3", "r4", "r5", "r6", "r7" + "movne r3, r5\n\t" + "eor r2, r2, r3\n\t" +#endif /*WOLFSSL_SP_SMALL */ + "mov %[a], r2\n\t" + : [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r2", "r3", "r12", "lr", "r4", "r5", "r6" ); -#endif - - return r; + return (uint32_t)(size_t)a; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -65305,325 +141068,1171 @@ static int sp_1024_point_to_ecc_point_32(const sp_point_1024* p, ecc_point* pm) * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -SP_NOINLINE static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, - sp_digit mp) +static SP_NOINLINE void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit mp) { - sp_digit ca = 0; - __asm__ __volatile__ ( - "ldr r11, [%[m], #0]\n\t" - "# i = 0\n\t" - "mov r12, #0\n\t" - "ldr r10, [%[a], #0]\n\t" - "ldr r14, [%[a], #4]\n\t" - "\n1:\n\t" - "# mu = a[i] * mp\n\t" - "mul r8, %[mp], r10\n\t" - "# a[i+0] += m[0] * mu\n\t" - "ldr r9, [%[a], #0]\n\t" +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) + "ldr r11, [%[m]]\n\t" +#endif + /* i = 0 */ + "mov r9, #0\n\t" + "mov r3, #0\n\t" + "ldr r12, [%[a]]\n\t" + "ldr lr, [%[a], #4]\n\t" + "\n" + "L_sp_1024_mont_reduce_32_word_%=: \n\t" + /* mu = a[i] * mp */ + "mul r8, %[mp], r12\n\t" + /* a[i+0] += m[0] * mu */ +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "ldr r11, [%[m]]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r7, r11, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r7\n\t" + "lsl r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r12, r12, r7\n\t" + "adc r5, r5, #0\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r5, r7\n\t" +#else "umull r6, r7, r8, r11\n\t" + "adds r12, r12, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + /* a[i+1] += m[1] * mu */ + "ldr r7, [%[m], #4]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r4, r4, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r4, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r4, r10, #0\n\t" +#endif + "mov r12, lr\n\t" + "adds r12, r12, r5\n\t" + "adc r4, r4, #0\n\t" + /* a[i+2] += m[2] * mu */ + "ldr r7, [%[m], #8]\n\t" + "ldr lr, [%[a], #8]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r10, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r10\n\t" + "lsl r10, r7, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" + "lsl r6, r8, #16\n\t" + "lsl r10, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r10, r10, #16\n\t" + "mul r10, r6, r10\n\t" + "adds lr, lr, r10\n\t" + "adc r5, r5, #0\n\t" + "lsr r10, r7, #16\n\t" + "mul r6, r10, r6\n\t" + "lsr r10, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r5, r10\n\t" +#else + "umull r6, r10, r8, r7\n\t" + "adds lr, lr, r6\n\t" + "adc r5, r10, #0\n\t" +#endif + "adds lr, lr, r4\n\t" + "adc r5, r5, #0\n\t" + /* a[i+3] += m[3] * mu */ + "ldr r7, [%[m], #12]\n\t" + "ldr r10, [%[a], #12]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r7, #0\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #12]\n\t" + "adc r4, r4, #0\n\t" + /* a[i+4] += m[4] * mu */ + "ldr r7, [%[m], #16]\n\t" + "ldr r10, [%[a], #16]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r5, r5, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "# a[i+1] += m[1] * mu\n\t" - "ldr r7, [%[m], #4]\n\t" - "ldr r9, [%[a], #4]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #16]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+5] += m[5] * mu */ + "ldr r7, [%[m], #20]\n\t" + "ldr r10, [%[a], #20]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r10, r14, r6\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" +#endif "adds r10, r10, r5\n\t" + "str r10, [%[a], #20]\n\t" "adc r4, r4, #0\n\t" - "# a[i+2] += m[2] * mu\n\t" - "ldr r7, [%[m], #8]\n\t" - "ldr r14, [%[a], #8]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r14, r14, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r14, r14, r4\n\t" + /* a[i+6] += m[6] * mu */ + "ldr r7, [%[m], #24]\n\t" + "ldr r10, [%[a], #24]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+3] += m[3] * mu\n\t" - "ldr r7, [%[m], #12]\n\t" - "ldr r9, [%[a], #12]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #24]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+7] += m[7] * mu */ + "ldr r7, [%[m], #28]\n\t" + "ldr r10, [%[a], #28]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #12]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #28]\n\t" "adc r4, r4, #0\n\t" - "# a[i+4] += m[4] * mu\n\t" - "ldr r7, [%[m], #16]\n\t" - "ldr r9, [%[a], #16]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #16]\n\t" + /* a[i+8] += m[8] * mu */ + "ldr r7, [%[m], #32]\n\t" + "ldr r10, [%[a], #32]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+5] += m[5] * mu\n\t" - "ldr r7, [%[m], #20]\n\t" - "ldr r9, [%[a], #20]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #32]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+9] += m[9] * mu */ + "ldr r7, [%[m], #36]\n\t" + "ldr r10, [%[a], #36]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #20]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #36]\n\t" "adc r4, r4, #0\n\t" - "# a[i+6] += m[6] * mu\n\t" - "ldr r7, [%[m], #24]\n\t" - "ldr r9, [%[a], #24]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #24]\n\t" + /* a[i+10] += m[10] * mu */ + "ldr r7, [%[m], #40]\n\t" + "ldr r10, [%[a], #40]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+7] += m[7] * mu\n\t" - "ldr r7, [%[m], #28]\n\t" - "ldr r9, [%[a], #28]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #40]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+11] += m[11] * mu */ + "ldr r7, [%[m], #44]\n\t" + "ldr r10, [%[a], #44]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #28]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #44]\n\t" "adc r4, r4, #0\n\t" - "# a[i+8] += m[8] * mu\n\t" - "ldr r7, [%[m], #32]\n\t" - "ldr r9, [%[a], #32]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #32]\n\t" + /* a[i+12] += m[12] * mu */ + "ldr r7, [%[m], #48]\n\t" + "ldr r10, [%[a], #48]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+9] += m[9] * mu\n\t" - "ldr r7, [%[m], #36]\n\t" - "ldr r9, [%[a], #36]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #48]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+13] += m[13] * mu */ + "ldr r7, [%[m], #52]\n\t" + "ldr r10, [%[a], #52]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #36]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #52]\n\t" "adc r4, r4, #0\n\t" - "# a[i+10] += m[10] * mu\n\t" - "ldr r7, [%[m], #40]\n\t" - "ldr r9, [%[a], #40]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #40]\n\t" + /* a[i+14] += m[14] * mu */ + "ldr r7, [%[m], #56]\n\t" + "ldr r10, [%[a], #56]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+11] += m[11] * mu\n\t" - "ldr r7, [%[m], #44]\n\t" - "ldr r9, [%[a], #44]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #56]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+15] += m[15] * mu */ + "ldr r7, [%[m], #60]\n\t" + "ldr r10, [%[a], #60]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #44]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #60]\n\t" "adc r4, r4, #0\n\t" - "# a[i+12] += m[12] * mu\n\t" - "ldr r7, [%[m], #48]\n\t" - "ldr r9, [%[a], #48]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #48]\n\t" + /* a[i+16] += m[16] * mu */ + "ldr r7, [%[m], #64]\n\t" + "ldr r10, [%[a], #64]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+13] += m[13] * mu\n\t" - "ldr r7, [%[m], #52]\n\t" - "ldr r9, [%[a], #52]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #64]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+17] += m[17] * mu */ + "ldr r7, [%[m], #68]\n\t" + "ldr r10, [%[a], #68]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #52]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #68]\n\t" "adc r4, r4, #0\n\t" - "# a[i+14] += m[14] * mu\n\t" - "ldr r7, [%[m], #56]\n\t" - "ldr r9, [%[a], #56]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #56]\n\t" + /* a[i+18] += m[18] * mu */ + "ldr r7, [%[m], #72]\n\t" + "ldr r10, [%[a], #72]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+15] += m[15] * mu\n\t" - "ldr r7, [%[m], #60]\n\t" - "ldr r9, [%[a], #60]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #72]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+19] += m[19] * mu */ + "ldr r7, [%[m], #76]\n\t" + "ldr r10, [%[a], #76]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #60]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #76]\n\t" "adc r4, r4, #0\n\t" - "# a[i+16] += m[16] * mu\n\t" - "ldr r7, [%[m], #64]\n\t" - "ldr r9, [%[a], #64]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #64]\n\t" + /* a[i+20] += m[20] * mu */ + "ldr r7, [%[m], #80]\n\t" + "ldr r10, [%[a], #80]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+17] += m[17] * mu\n\t" - "ldr r7, [%[m], #68]\n\t" - "ldr r9, [%[a], #68]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #80]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+21] += m[21] * mu */ + "ldr r7, [%[m], #84]\n\t" + "ldr r10, [%[a], #84]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #68]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #84]\n\t" "adc r4, r4, #0\n\t" - "# a[i+18] += m[18] * mu\n\t" - "ldr r7, [%[m], #72]\n\t" - "ldr r9, [%[a], #72]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #72]\n\t" + /* a[i+22] += m[22] * mu */ + "ldr r7, [%[m], #88]\n\t" + "ldr r10, [%[a], #88]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+19] += m[19] * mu\n\t" - "ldr r7, [%[m], #76]\n\t" - "ldr r9, [%[a], #76]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #88]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+23] += m[23] * mu */ + "ldr r7, [%[m], #92]\n\t" + "ldr r10, [%[a], #92]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #76]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #92]\n\t" "adc r4, r4, #0\n\t" - "# a[i+20] += m[20] * mu\n\t" - "ldr r7, [%[m], #80]\n\t" - "ldr r9, [%[a], #80]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #80]\n\t" + /* a[i+24] += m[24] * mu */ + "ldr r7, [%[m], #96]\n\t" + "ldr r10, [%[a], #96]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+21] += m[21] * mu\n\t" - "ldr r7, [%[m], #84]\n\t" - "ldr r9, [%[a], #84]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #96]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+25] += m[25] * mu */ + "ldr r7, [%[m], #100]\n\t" + "ldr r10, [%[a], #100]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #84]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #100]\n\t" "adc r4, r4, #0\n\t" - "# a[i+22] += m[22] * mu\n\t" - "ldr r7, [%[m], #88]\n\t" - "ldr r9, [%[a], #88]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #88]\n\t" + /* a[i+26] += m[26] * mu */ + "ldr r7, [%[m], #104]\n\t" + "ldr r10, [%[a], #104]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+23] += m[23] * mu\n\t" - "ldr r7, [%[m], #92]\n\t" - "ldr r9, [%[a], #92]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #104]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+27] += m[27] * mu */ + "ldr r7, [%[m], #108]\n\t" + "ldr r10, [%[a], #108]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #92]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #108]\n\t" "adc r4, r4, #0\n\t" - "# a[i+24] += m[24] * mu\n\t" - "ldr r7, [%[m], #96]\n\t" - "ldr r9, [%[a], #96]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #96]\n\t" + /* a[i+28] += m[28] * mu */ + "ldr r7, [%[m], #112]\n\t" + "ldr r10, [%[a], #112]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+25] += m[25] * mu\n\t" - "ldr r7, [%[m], #100]\n\t" - "ldr r9, [%[a], #100]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r7, #0\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #112]\n\t" + "adc r5, r5, #0\n\t" + /* a[i+29] += m[29] * mu */ + "ldr r7, [%[m], #116]\n\t" + "ldr r10, [%[a], #116]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r4, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" + "adc r4, r4, #0\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r4, r4, r11\n\t" +#else + "umull r6, r7, r8, r7\n\t" + "adds r10, r10, r6\n\t" "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #100]\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #116]\n\t" "adc r4, r4, #0\n\t" - "# a[i+26] += m[26] * mu\n\t" - "ldr r7, [%[m], #104]\n\t" - "ldr r9, [%[a], #104]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #104]\n\t" + /* a[i+30] += m[30] * mu */ + "ldr r7, [%[m], #120]\n\t" + "ldr r10, [%[a], #120]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsr r11, r7, #16\n\t" + "lsr r6, r8, #16\n\t" + "mul r5, r6, r11\n\t" + "lsl r11, r7, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" + "lsl r6, r8, #16\n\t" + "lsl r11, r7, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r11, r11, #16\n\t" + "mul r11, r6, r11\n\t" + "adds r10, r10, r11\n\t" "adc r5, r5, #0\n\t" - "# a[i+27] += m[27] * mu\n\t" - "ldr r7, [%[m], #108]\n\t" - "ldr r9, [%[a], #108]\n\t" + "lsr r11, r7, #16\n\t" + "mul r6, r11, r6\n\t" + "lsr r11, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r10, r10, r6\n\t" + "adc r5, r5, r11\n\t" +#else "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #108]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+28] += m[28] * mu\n\t" - "ldr r7, [%[m], #112]\n\t" - "ldr r9, [%[a], #112]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" + "adds r10, r10, r6\n\t" "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #112]\n\t" +#endif + "adds r10, r10, r4\n\t" + "str r10, [%[a], #120]\n\t" "adc r5, r5, #0\n\t" - "# a[i+29] += m[29] * mu\n\t" - "ldr r7, [%[m], #116]\n\t" - "ldr r9, [%[a], #116]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r4, r7, #0\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #116]\n\t" - "adc r4, r4, #0\n\t" - "# a[i+30] += m[30] * mu\n\t" - "ldr r7, [%[m], #120]\n\t" - "ldr r9, [%[a], #120]\n\t" - "umull r6, r7, r8, r7\n\t" - "adds r9, r9, r6\n\t" - "adc r5, r7, #0\n\t" - "adds r9, r9, r4\n\t" - "str r9, [%[a], #120]\n\t" - "adc r5, r5, #0\n\t" - "# a[i+31] += m[31] * mu\n\t" + /* a[i+31] += m[31] * mu */ +#if !(defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4)) "ldr r7, [%[m], #124]\n\t" - "ldr r9, [%[a], #124]\n\t" +#else + "ldr r11, [%[m], #124]\n\t" +#endif + "ldr r10, [%[a], #124]\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 4) + "lsl r6, r8, #16\n\t" + "lsl r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "lsr r7, r7, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r5, r5, r7\n\t" + "adcs r4, r3, #0\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" + "lsr r7, r11, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" + "mov r6, r8\n\t" + "lsr r7, r11, #16\n\t" + "lsr r6, r6, #16\n\t" + "mul r7, r6, r7\n\t" + "adds r4, r4, r7\n\t" + "lsl r7, r11, #16\n\t" + "adc r3, r3, #0\n\t" + "lsr r7, r7, #16\n\t" + "mul r6, r7, r6\n\t" + "lsr r7, r6, #16\n\t" + "lsl r6, r6, #16\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r3, r3, #0\n\t" +#else "umull r6, r7, r8, r7\n\t" "adds r5, r5, r6\n\t" - "adcs r7, r7, %[ca]\n\t" - "mov %[ca], #0\n\t" - "adc %[ca], %[ca], %[ca]\n\t" - "adds r9, r9, r5\n\t" - "str r9, [%[a], #124]\n\t" - "ldr r9, [%[a], #128]\n\t" - "adcs r9, r9, r7\n\t" - "str r9, [%[a], #128]\n\t" - "adc %[ca], %[ca], #0\n\t" - "# i += 1\n\t" + "adcs r4, r7, r3\n\t" + "mov r3, #0\n\t" + "adc r3, r3, r3\n\t" +#endif + "adds r10, r10, r5\n\t" + "str r10, [%[a], #124]\n\t" + "ldr r10, [%[a], #128]\n\t" + "adcs r10, r10, r4\n\t" + "str r10, [%[a], #128]\n\t" + "adc r3, r3, #0\n\t" + /* i += 1 */ + "add r9, r9, #4\n\t" "add %[a], %[a], #4\n\t" - "add r12, r12, #4\n\t" - "cmp r12, #128\n\t" - "blt 1b\n\t" - "str r10, [%[a], #0]\n\t" - "str r14, [%[a], #4]\n\t" + "cmp r9, #0x80\n\t" + "blt L_sp_1024_mont_reduce_32_word_%=\n\t" + "str r12, [%[a]]\n\t" + "str lr, [%[a], #4]\n\t" "ldr r6, [%[m], #124]\n\t" - "subs r9, r6, r9\n\t" - "neg %[ca], %[ca]\n\t" - "sbc r9, r9, r9\n\t" - "orr %[ca], %[ca], r9\n\t" - : [ca] "+r" (ca), [a] "+r" (a) - : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12", "r11" + "subs r10, r6, r10\n\t" + "neg r3, r3\n\t" + "sbc r10, r10, r10\n\t" + "orr r3, r3, r10\n\t" + "mov %[mp], r3\n\t" + : [a] "+r" (a), [m] "+r" (m), [mp] "+r" (mp) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); - - sp_1024_cond_sub_32(a - 32, a, m, ca); + sp_1024_cond_sub_32(a - 32, a, m, mp); } /* Multiply two Montgomery form numbers mod the modulus (prime). @@ -65768,166 +142377,164 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, * b Second number to add in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldr r14, [%[m], #124]\n\t" - "adc r12, r12, #0\n\t" - "subs r14, r14, r7\n\t" - "neg r12, r12\n\t" - "sbc r14, r14, r14\n\t" - "sub %[r], %[r], #128\n\t" - "orr r12, r14\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbc r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sub %[r], %[r], #128\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldr r11, [%[m], #124]\n\t" + "adc r12, r12, #0\n\t" + "subs r11, r11, r7\n\t" + "neg r12, r12\n\t" + "sbc r11, r11, r11\n\t" + "sub %[r], %[r], #0x80\n\t" + "orr r12, r11\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbc r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } @@ -65940,146 +142547,145 @@ static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, const sp_digit* static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, const sp_digit* m) { __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adds r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldr r4, [%[m], #124]\n\t" - "adc r12, r12, #0\n\t" - "subs r4, r4, r14\n\t" - "neg r12, r12\n\t" - "sbc r4, r4, r4\n\t" - "sub %[r], %[r], #128\n\t" - "orr r12, r4\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbc r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sub %[r], %[r], #128\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adds r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldr r4, [%[m], #124]\n\t" + "adc r12, r12, #0\n\t" + "subs r4, r4, r11\n\t" + "neg r12, r12\n\t" + "sbc r4, r4, r4\n\t" + "sub %[r], %[r], #0x80\n\t" + "orr r12, r4\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbc r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r8", "r9", "r10", "r11", "r4", "r5", "r6", "r7", "r12" ); } @@ -66092,301 +142698,300 @@ static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, const sp_digit* static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, const sp_digit* m) { __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adds r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adcs r10, r10, r10\n\t" - "adcs r14, r14, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t" - "ldr r4, [%[m], #124]\n\t" - "adc r12, r12, #0\n\t" - "subs r4, r4, r14\n\t" - "neg r12, r12\n\t" - "sbc r4, r4, r4\n\t" - "sub %[r], %[r], #128\n\t" - "orr r12, r4\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbc r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sub %[r], %[r], #128\n\t" - "sub %[m], %[m], #128\n\t" - "sub %[a], %[a], #128\n\t" - "mov r12, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adds r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "adcs r8, r8, r4\n\t" - "adcs r9, r9, r5\n\t" - "adcs r10, r10, r6\n\t" - "adcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldr r7, [%[m], #124]\n\t" - "adc r12, r12, #0\n\t" - "subs r7, r7, r14\n\t" - "neg r12, r12\n\t" - "sbc r7, r7, r7\n\t" - "sub %[r], %[r], #128\n\t" - "orr r12, r7\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "subs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "sbcs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "sbcs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "sbcs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "sbcs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "sbcs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "sbcs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbcs r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "ldm %[r], {r8, r9, r10, r14}\n\t" - "ldm %[m]!, {r4, r5, r6, r7}\n\t" - "and r4, r4, r12\n\t" - "and r5, r5, r12\n\t" - "and r6, r6, r12\n\t" - "and r7, r7, r12\n\t" - "sbcs r8, r8, r4\n\t" - "sbcs r9, r9, r5\n\t" - "sbcs r10, r10, r6\n\t" - "sbc r14, r14, r7\n\t" - "stm %[r]!, {r8, r9, r10, r14}\n\t" - "sub %[r], %[r], #128\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adds r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adcs r11, r11, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ldr r4, [%[m], #124]\n\t" + "adc r12, r12, #0\n\t" + "subs r4, r4, r11\n\t" + "neg r12, r12\n\t" + "sbc r4, r4, r4\n\t" + "sub %[r], %[r], #0x80\n\t" + "orr r12, r4\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbc r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "sub %[r], %[r], #0x80\n\t" + "sub %[m], %[m], #0x80\n\t" + "sub %[a], %[a], #0x80\n\t" + "mov r12, #0\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adds r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r8, r9, r10, r11}\n\t" + "adcs r8, r8, r4\n\t" + "adcs r9, r9, r5\n\t" + "adcs r10, r10, r6\n\t" + "adcs r11, r11, r7\n\t" + "stm %[r]!, {r8, r9, r10, r11}\n\t" + "ldr r7, [%[m], #124]\n\t" + "adc r12, r12, #0\n\t" + "subs r7, r7, r11\n\t" + "neg r12, r12\n\t" + "sbc r7, r7, r7\n\t" + "sub %[r], %[r], #0x80\n\t" + "orr r12, r7\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbc r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r8", "r9", "r10", "r11", "r4", "r5", "r6", "r7", "r12" ); } @@ -66397,161 +143002,158 @@ static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, const sp_digit* * b Number to subtract with in Montgomery form. * m Modulus (prime). */ -static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - const sp_digit* m) +static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { __asm__ __volatile__ ( - "mov r12, #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r14}\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc r12, r12, r12\n\t" - "sub %[r], %[r], #128\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adcs r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[r], {r4, r5, r6, r7}\n\t" - "ldm %[m]!, {r8, r9, r10, r14}\n\t" - "and r8, r8, r12\n\t" - "and r9, r9, r12\n\t" - "and r10, r10, r12\n\t" - "and r14, r14, r12\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "adc r7, r7, r14\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sub %[r], %[r], #128\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "sbc r12, r12, r12\n\t" + "sub %[r], %[r], #0x80\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[r], {r4, r5, r6, r7}\n\t" + "ldm %[m]!, {r8, r9, r10, r11}\n\t" + "and r8, r8, r12\n\t" + "and r9, r9, r12\n\t" + "and r10, r10, r12\n\t" + "and r11, r11, r12\n\t" + "adcs r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adc r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" ); } @@ -66565,35 +143167,33 @@ static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r7, #0\n\t" + "mov lr, #0\n\t" "mov r6, #0\n\t" - "1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" + "mov r12, #0\n\t" + "\n" + "L_sp_1024_cond_add_32_words_%=: \n\t" + "adds lr, lr, #-1\n\t" + "ldr r4, [%[a], r12]\n\t" + "ldr r5, [%[b], r12]\n\t" "and r5, r5, %[m]\n\t" "adcs r4, r4, r5\n\t" - "adc %[c], r7, r7\n\t" - "str r4, [%[r], r6]\n\t" - "add r6, r6, #4\n\t" - "cmp r6, #128\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7" + "adc lr, r6, r6\n\t" + "str r4, [%[r], r12]\n\t" + "add r12, r12, #4\n\t" + "cmp r12, #0x80\n\t" + "blt L_sp_1024_cond_add_32_words_%=\n\t" + "mov %[r], lr\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6" ); - - return c; + return (uint32_t)(size_t)r; } -#endif /* WOLFSSL_SP_SMALL */ -#ifndef WOLFSSL_SP_SMALL +#else /* Conditionally add a and b using the mask m. * m is -1 to add and 0 when not. * @@ -66602,342 +143202,140 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_dig * b A single precision number to add. * m Mask value to apply. */ -static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) +static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, sp_digit m) { - sp_digit c = 0; - __asm__ __volatile__ ( - "mov r8, #0\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #0]\n\t" - "ldr r5, [%[a], #4]\n\t" - "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" -#else - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" -#endif + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #0]\n\t" - "str r5, [%[r], #4]\n\t" -#else - "strd r4, r5, [%[r], #0]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #8]\n\t" - "ldr r5, [%[a], #12]\n\t" - "ldr r6, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" -#else - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #8]\n\t" - "str r5, [%[r], #12]\n\t" -#else - "strd r4, r5, [%[r], #8]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #16]\n\t" - "ldr r5, [%[a], #20]\n\t" - "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" -#else - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #16]\n\t" - "str r5, [%[r], #20]\n\t" -#else - "strd r4, r5, [%[r], #16]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #24]\n\t" - "ldr r5, [%[a], #28]\n\t" - "ldr r6, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" -#else - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #24]\n\t" - "str r5, [%[r], #28]\n\t" -#else - "strd r4, r5, [%[r], #24]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #32]\n\t" - "ldr r5, [%[a], #36]\n\t" - "ldr r6, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" -#else - "ldrd r4, r5, [%[a], #32]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #32]\n\t" - "str r5, [%[r], #36]\n\t" -#else - "strd r4, r5, [%[r], #32]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #40]\n\t" - "ldr r5, [%[a], #44]\n\t" - "ldr r6, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" -#else - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #40]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #40]\n\t" - "str r5, [%[r], #44]\n\t" -#else - "strd r4, r5, [%[r], #40]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #48]\n\t" - "ldr r5, [%[a], #52]\n\t" - "ldr r6, [%[b], #48]\n\t" - "ldr r7, [%[b], #52]\n\t" -#else - "ldrd r4, r5, [%[a], #48]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #48]\n\t" - "str r5, [%[r], #52]\n\t" -#else - "strd r4, r5, [%[r], #48]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #56]\n\t" - "ldr r5, [%[a], #60]\n\t" - "ldr r6, [%[b], #56]\n\t" - "ldr r7, [%[b], #60]\n\t" -#else - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #56]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #56]\n\t" - "str r5, [%[r], #60]\n\t" -#else - "strd r4, r5, [%[r], #56]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #64]\n\t" - "ldr r5, [%[a], #68]\n\t" - "ldr r6, [%[b], #64]\n\t" - "ldr r7, [%[b], #68]\n\t" -#else - "ldrd r4, r5, [%[a], #64]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #64]\n\t" - "str r5, [%[r], #68]\n\t" -#else - "strd r4, r5, [%[r], #64]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #72]\n\t" - "ldr r5, [%[a], #76]\n\t" - "ldr r6, [%[b], #72]\n\t" - "ldr r7, [%[b], #76]\n\t" -#else - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #72]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #72]\n\t" - "str r5, [%[r], #76]\n\t" -#else - "strd r4, r5, [%[r], #72]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #80]\n\t" - "ldr r5, [%[a], #84]\n\t" - "ldr r6, [%[b], #80]\n\t" - "ldr r7, [%[b], #84]\n\t" -#else - "ldrd r4, r5, [%[a], #80]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #80]\n\t" - "str r5, [%[r], #84]\n\t" -#else - "strd r4, r5, [%[r], #80]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #88]\n\t" - "ldr r5, [%[a], #92]\n\t" - "ldr r6, [%[b], #88]\n\t" - "ldr r7, [%[b], #92]\n\t" -#else - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #88]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #88]\n\t" - "str r5, [%[r], #92]\n\t" -#else - "strd r4, r5, [%[r], #88]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #96]\n\t" - "ldr r5, [%[a], #100]\n\t" - "ldr r6, [%[b], #96]\n\t" - "ldr r7, [%[b], #100]\n\t" -#else - "ldrd r4, r5, [%[a], #96]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #96]\n\t" - "str r5, [%[r], #100]\n\t" -#else - "strd r4, r5, [%[r], #96]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #104]\n\t" - "ldr r5, [%[a], #108]\n\t" - "ldr r6, [%[b], #104]\n\t" - "ldr r7, [%[b], #108]\n\t" -#else - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #104]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #104]\n\t" - "str r5, [%[r], #108]\n\t" -#else - "strd r4, r5, [%[r], #104]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #112]\n\t" - "ldr r5, [%[a], #116]\n\t" - "ldr r6, [%[b], #112]\n\t" - "ldr r7, [%[b], #116]\n\t" -#else - "ldrd r4, r5, [%[a], #112]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #112]\n\t" - "str r5, [%[r], #116]\n\t" -#else - "strd r4, r5, [%[r], #112]\n\t" -#endif -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r4, [%[a], #120]\n\t" - "ldr r5, [%[a], #124]\n\t" - "ldr r6, [%[b], #120]\n\t" - "ldr r7, [%[b], #124]\n\t" -#else - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #120]\n\t" -#endif + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "str r4, [%[r], #120]\n\t" - "str r5, [%[r], #124]\n\t" -#else - "strd r4, r5, [%[r], #120]\n\t" -#endif - "adc %[c], r8, r8\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8" + "stm %[r]!, {r4, r5}\n\t" + "adc %[r], r8, r8\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m) + : + : "memory", "r12", "lr", "r4", "r5", "r6", "r7", "r8" ); - - return c; + return (uint32_t)(size_t)r; } -#endif /* !WOLFSSL_SP_SMALL */ +#endif /* WOLFSSL_SP_SMALL */ static void sp_1024_rshift1_32(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( -#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) - "ldr r2, [%[a]]\n\t" - "ldr r3, [%[a], #4]\n\t" -#else - "ldrd r2, r3, [%[a]]\n\t" -#endif + "ldm %[a], {r2, r3}\n\t" "lsr r2, r2, #1\n\t" "orr r2, r2, r3, lsl #31\n\t" "lsr r3, r3, #1\n\t" "ldr r4, [%[a], #8]\n\t" - "str r2, [%[r], #0]\n\t" + "str r2, [%[r]]\n\t" "orr r3, r3, r4, lsl #31\n\t" "lsr r4, r4, #1\n\t" "ldr r2, [%[a], #12]\n\t" @@ -67058,8 +143456,8 @@ static void sp_1024_rshift1_32(sp_digit* r, const sp_digit* a) "lsr r3, r3, #1\n\t" "str r2, [%[r], #120]\n\t" "str r3, [%[r], #124]\n\t" + : [r] "+r" (r), [a] "+r" (a) : - : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4" ); } @@ -67282,31 +143680,30 @@ static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "add r14, %[a], #128\n\t" - "\n1:\n\t" - "rsbs %[c], %[c], #0\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "mov r12, #0\n\t" + "add lr, %[a], #0x80\n\t" + "\n" + "L_sp_1024_sub_32_word_%=: \n\t" + "rsbs r12, r12, #0\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], r4, r4\n\t" - "cmp %[a], r14\n\t" - "bne 1b\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc r12, r3, r3\n\t" + "cmp %[a], lr\n\t" + "bne L_sp_1024_sub_32_word_%=\n\t" + "mov %[r], r12\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "lr" ); - - return c; + return (uint32_t)(size_t)r; } #else @@ -67316,75 +143713,71 @@ static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, - const sp_digit* b) +static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { - sp_digit c = 0; - __asm__ __volatile__ ( - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" - "subs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "subs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "ldm %[a]!, {r4, r5, r6, r7}\n\t" - "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "sbcs r7, r7, r11\n\t" - "stm %[r]!, {r4, r5, r6, r7}\n\t" - "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "ldm %[a]!, {r3, r4, r5, r6}\n\t" + "ldm %[b]!, {r7, r8, r9, r10}\n\t" + "sbcs r3, r3, r7\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "stm %[r]!, {r3, r4, r5, r6}\n\t" + "sbc %[r], r6, r6\n\t" + : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); - - return c; + return (uint32_t)(size_t)r; } #endif /* WOLFSSL_SP_SMALL */