From 4f75d4eea2bd3d7607134dc1463d06c4f961299b Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Mon, 8 Apr 2019 09:03:50 +1000 Subject: [PATCH] Cortex-M code changed to support IAR compiler --- configure.ac | 4 +- wolfcrypt/src/sp_cortexm.c | 7736 ++++++++++++++++++------------------ 2 files changed, 3880 insertions(+), 3860 deletions(-) diff --git a/configure.ac b/configure.ac index 3d9e8722e..02f6bfa74 100644 --- a/configure.ac +++ b/configure.ac @@ -4039,8 +4039,8 @@ if test "$ENABLED_SP_ASM" = "yes"; then ENABLED_SP_ARM_THUMB_ASM=yes else if test $host_alias = "cortex"; then - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM_CORTEX_ASM" - AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM_CORTEX_ASM" + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM_CORTEX_M_ASM -mcpu=cortex-m4" + AM_CCASFLAGS="$AM_CCASFLAGS -DWOLFSSL_SP_ARM_CORTEX_M_ASM" ENABLED_SP_ARM_CORTEX_ASM=yes else AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_SP_ARM32_ASM" diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index ce22cf975..d41c7169c 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -190,500 +190,500 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { sp_digit tmp[8]; - __asm__ __volatile__ ( + asm volatile ( "mov r8, %[r]\n\t" "mov %[r], #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 0]\n\t" + /* A[0] * B[0] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r3, r4, r6, r7\n\t" "mov r5, #0\n\t" - "str r3, [%[tmp], 0]\n\t" + "str r3, [%[tmp], #0]\n\t" "mov r3, #0\n\t" - "# A[0] * B[1]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 4]\n\t" + /* A[0] * B[1] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * B[0]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * B[0] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 4]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #4]\n\t" "mov r4, #0\n\t" - "# A[0] * B[2]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 8]\n\t" + /* A[0] * B[2] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[1] * B[1]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[1] * B[1] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * B[0]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * B[0] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [%[tmp], 8]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [%[tmp], #8]\n\t" "mov r5, #0\n\t" - "# A[0] * B[3]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 12]\n\t" + /* A[0] * B[3] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[1] * B[2]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[1] * B[2] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[2] * B[1]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[2] * B[1] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * B[0]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * B[0] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [%[tmp], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [%[tmp], #12]\n\t" "mov r3, #0\n\t" - "# A[0] * B[4]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 16]\n\t" + /* A[0] * B[4] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * B[3]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * B[3] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[2] * B[2]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[2] * B[2] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[3] * B[1]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[3] * B[1] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[4] * B[0]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[4] * B[0] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #16]\n\t" "mov r4, #0\n\t" - "# A[0] * B[5]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 20]\n\t" + /* A[0] * B[5] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[1] * B[4]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[1] * B[4] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * B[3]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * B[3] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[3] * B[2]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[3] * B[2] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[4] * B[1]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[4] * B[1] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[5] * B[0]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[5] * B[0] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [%[tmp], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [%[tmp], #20]\n\t" "mov r5, #0\n\t" - "# A[0] * B[6]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 24]\n\t" + /* A[0] * B[6] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[1] * B[5]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[1] * B[5] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[2] * B[4]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[2] * B[4] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * B[3]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * B[3] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[4] * B[2]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[4] * B[2] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[5] * B[1]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[5] * B[1] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[6] * B[0]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[6] * B[0] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [%[tmp], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [%[tmp], #24]\n\t" "mov r3, #0\n\t" - "# A[0] * B[7]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[0] * B[7] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * B[6]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * B[6] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[2] * B[5]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[2] * B[5] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[3] * B[4]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[3] * B[4] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[4] * B[3]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[4] * B[3] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[5] * B[2]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[5] * B[2] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[6] * B[1]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[6] * B[1] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[7] * B[0]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[7] * B[0] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 28]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #28]\n\t" "mov r4, #0\n\t" - "# A[1] * B[7]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[1] * B[7] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * B[6]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * B[6] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[3] * B[5]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[3] * B[5] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[4] * B[4]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[4] * B[4] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[5] * B[3]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[5] * B[3] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[6] * B[2]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[6] * B[2] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[7] * B[1]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[7] * B[1] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [r8, 32]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [r8, #32]\n\t" "mov r5, #0\n\t" - "# A[2] * B[7]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[2] * B[7] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * B[6]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * B[6] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[4] * B[5]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[4] * B[5] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[5] * B[4]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[5] * B[4] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[6] * B[3]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[6] * B[3] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[7] * B[2]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[7] * B[2] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [r8, 36]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [r8, #36]\n\t" "mov r3, #0\n\t" - "# A[3] * B[7]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[3] * B[7] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[4] * B[6]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[4] * B[6] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[5] * B[5]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[5] * B[5] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[6] * B[4]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[6] * B[4] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[7] * B[3]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[7] * B[3] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [r8, 40]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [r8, #40]\n\t" "mov r4, #0\n\t" - "# A[4] * B[7]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[4] * B[7] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[5] * B[6]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[5] * B[6] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[6] * B[5]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[6] * B[5] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[7] * B[4]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[7] * B[4] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [r8, 44]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [r8, #44]\n\t" "mov r5, #0\n\t" - "# A[5] * B[7]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[5] * B[7] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[6] * B[6]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[6] * B[6] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[7] * B[5]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[7] * B[5] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [r8, 48]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [r8, #48]\n\t" "mov r3, #0\n\t" - "# A[6] * B[7]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[6] * B[7] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[7] * B[6]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[7] * B[6] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [r8, 52]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [r8, #52]\n\t" "mov r4, #0\n\t" - "# A[7] * B[7]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[7] * B[7] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "str r5, [r8, 56]\n\t" - "str r3, [r8, 60]\n\t" - "ldr r3, [%[tmp], 0]\n\t" - "ldr r4, [%[tmp], 4]\n\t" - "ldr r5, [%[tmp], 8]\n\t" - "ldr r6, [%[tmp], 12]\n\t" - "str r3, [r8, 0]\n\t" - "str r4, [r8, 4]\n\t" - "str r5, [r8, 8]\n\t" - "str r6, [r8, 12]\n\t" - "ldr r3, [%[tmp], 16]\n\t" - "ldr r4, [%[tmp], 20]\n\t" - "ldr r5, [%[tmp], 24]\n\t" - "ldr r6, [%[tmp], 28]\n\t" - "str r3, [r8, 16]\n\t" - "str r4, [r8, 20]\n\t" - "str r5, [r8, 24]\n\t" - "str r6, [r8, 28]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "str r5, [r8, #56]\n\t" + "str r3, [r8, #60]\n\t" + "ldr r3, [%[tmp], #0]\n\t" + "ldr r4, [%[tmp], #4]\n\t" + "ldr r5, [%[tmp], #8]\n\t" + "ldr r6, [%[tmp], #12]\n\t" + "str r3, [r8, #0]\n\t" + "str r4, [r8, #4]\n\t" + "str r5, [r8, #8]\n\t" + "str r6, [r8, #12]\n\t" + "ldr r3, [%[tmp], #16]\n\t" + "ldr r4, [%[tmp], #20]\n\t" + "ldr r5, [%[tmp], #24]\n\t" + "ldr r6, [%[tmp], #28]\n\t" + "str r3, [r8, #16]\n\t" + "str r4, [r8, #20]\n\t" + "str r5, [r8, #24]\n\t" + "str r6, [r8, #28]\n\t" "mov %[r], r8\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) @@ -699,380 +699,380 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) { sp_digit tmp[8]; - __asm__ __volatile__ ( + asm volatile ( "mov r8, %[r]\n\t" "mov %[r], #0\n\t" - "# A[0] * A[0]\n\t" - "ldr r6, [%[a], 0]\n\t" + /* A[0] * A[0] */ + "ldr r6, [%[a], #0]\n\t" "umull r3, r4, r6, r6\n\t" "mov r5, #0\n\t" - "str r3, [%[tmp], 0]\n\t" + "str r3, [%[tmp], #0]\n\t" "mov r3, #0\n\t" - "# A[0] * A[1]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 4]\n\t" + /* A[0] * A[1] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 4]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #4]\n\t" "mov r4, #0\n\t" - "# A[0] * A[2]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 8]\n\t" + /* A[0] * A[2] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[1] * A[1]\n\t" - "ldr r6, [%[a], 4]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[1] * A[1] */ + "ldr r6, [%[a], #4]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [%[tmp], 8]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [%[tmp], #8]\n\t" "mov r5, #0\n\t" - "# A[0] * A[3]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 12]\n\t" + /* A[0] * A[3] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[1] * A[2]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 8]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[1] * A[2] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [%[tmp], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [%[tmp], #12]\n\t" "mov r3, #0\n\t" - "# A[0] * A[4]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 16]\n\t" + /* A[0] * A[4] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * A[3]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 12]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * A[3] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[2] * A[2]\n\t" - "ldr r6, [%[a], 8]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[2] * A[2] */ + "ldr r6, [%[a], #8]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #16]\n\t" "mov r4, #0\n\t" - "# A[0] * A[5]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 20]\n\t" + /* A[0] * A[5] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[1] * A[4]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[1] * A[4] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * A[3]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 12]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * A[3] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [%[tmp], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [%[tmp], #20]\n\t" "mov r5, #0\n\t" - "# A[0] * A[6]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 24]\n\t" + /* A[0] * A[6] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[1] * A[5]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[1] * A[5] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[2] * A[4]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 16]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[2] * A[4] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * A[3]\n\t" - "ldr r6, [%[a], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * A[3] */ + "ldr r6, [%[a], #12]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [%[tmp], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [%[tmp], #24]\n\t" "mov r3, #0\n\t" - "# A[0] * A[7]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[0] * A[7] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * A[6]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * A[6] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[2] * A[5]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 20]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[2] * A[5] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[3] * A[4]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[a], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[3] * A[4] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 28]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #28]\n\t" "mov r4, #0\n\t" - "# A[1] * A[7]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[1] * A[7] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * A[6]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * A[6] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[3] * A[5]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[a], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[3] * A[5] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[4] * A[4]\n\t" - "ldr r6, [%[a], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[4] * A[4] */ + "ldr r6, [%[a], #16]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [r8, 32]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [r8, #32]\n\t" "mov r5, #0\n\t" - "# A[2] * A[7]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[2] * A[7] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * A[6]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * A[6] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[4] * A[5]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[a], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[4] * A[5] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [r8, 36]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [r8, #36]\n\t" "mov r3, #0\n\t" - "# A[3] * A[7]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[3] * A[7] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[4] * A[6]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[4] * A[6] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[5] * A[5]\n\t" - "ldr r6, [%[a], 20]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[5] * A[5] */ + "ldr r6, [%[a], #20]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [r8, 40]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [r8, #40]\n\t" "mov r4, #0\n\t" - "# A[4] * A[7]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[4] * A[7] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[5] * A[6]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[5] * A[6] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [r8, 44]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [r8, #44]\n\t" "mov r5, #0\n\t" - "# A[5] * A[7]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[5] * A[7] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[6] * A[6]\n\t" - "ldr r6, [%[a], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[6] * A[6] */ + "ldr r6, [%[a], #24]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [r8, 48]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [r8, #48]\n\t" "mov r3, #0\n\t" - "# A[6] * A[7]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[6] * A[7] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [r8, 52]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [r8, #52]\n\t" "mov r4, #0\n\t" - "# A[7] * A[7]\n\t" - "ldr r6, [%[a], 28]\n\t" + /* A[7] * A[7] */ + "ldr r6, [%[a], #28]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "str r5, [r8, 56]\n\t" - "str r3, [r8, 60]\n\t" - "ldr r3, [%[tmp], 0]\n\t" - "ldr r4, [%[tmp], 4]\n\t" - "ldr r5, [%[tmp], 8]\n\t" - "ldr r6, [%[tmp], 12]\n\t" - "str r3, [r8, 0]\n\t" - "str r4, [r8, 4]\n\t" - "str r5, [r8, 8]\n\t" - "str r6, [r8, 12]\n\t" - "ldr r3, [%[tmp], 16]\n\t" - "ldr r4, [%[tmp], 20]\n\t" - "ldr r5, [%[tmp], 24]\n\t" - "ldr r6, [%[tmp], 28]\n\t" - "str r3, [r8, 16]\n\t" - "str r4, [r8, 20]\n\t" - "str r5, [r8, 24]\n\t" - "str r6, [r8, 28]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "str r5, [r8, #56]\n\t" + "str r3, [r8, #60]\n\t" + "ldr r3, [%[tmp], #0]\n\t" + "ldr r4, [%[tmp], #4]\n\t" + "ldr r5, [%[tmp], #8]\n\t" + "ldr r6, [%[tmp], #12]\n\t" + "str r3, [r8, #0]\n\t" + "str r4, [r8, #4]\n\t" + "str r5, [r8, #8]\n\t" + "str r6, [r8, #12]\n\t" + "ldr r3, [%[tmp], #16]\n\t" + "ldr r4, [%[tmp], #20]\n\t" + "ldr r5, [%[tmp], #24]\n\t" + "ldr r6, [%[tmp], #28]\n\t" + "str r3, [r8, #16]\n\t" + "str r4, [r8, #20]\n\t" + "str r5, [r8, #24]\n\t" + "str r6, [r8, #28]\n\t" "mov %[r], r8\n\t" : : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp) @@ -1091,41 +1091,41 @@ SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[b], #0]\n\t" - "adds r4, r5\n\t" + "adds r4, r4, r5\n\t" "str r4, [%[r], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #28]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" + "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5" @@ -1145,72 +1145,72 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r3, [%[a], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #0]\n\t" "ldr r6, [%[b], #4]\n\t" - "subs r3, r5\n\t" - "sbcs r4, r6\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #0]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" - "sbc %[c], %[c]\n\t" + "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6" @@ -1230,73 +1230,73 @@ SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[b], #0]\n\t" - "adds r4, r5\n\t" + "adds r4, r4, r5\n\t" "str r4, [%[r], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #60]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" + "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5" @@ -1401,136 +1401,136 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r3, [%[a], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #0]\n\t" "ldr r6, [%[b], #4]\n\t" - "subs r3, r5\n\t" - "sbcs r4, r6\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #0]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" + "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6" @@ -1550,137 +1550,137 @@ SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[b], #0]\n\t" - "adds r4, r5\n\t" + "adds r4, r4, r5\n\t" "str r4, [%[r], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #124]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" + "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5" @@ -1789,269 +1789,269 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r3, [%[a], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #0]\n\t" "ldr r6, [%[b], #4]\n\t" - "subs r3, r5\n\t" - "sbcs r4, r6\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #0]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "sub r5, r5, %[c]\n\t" "ldr r3, [%[a], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #0]\n\t" "ldr r6, [%[b], #4]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #0]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" + "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6" @@ -2071,273 +2071,273 @@ SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r7, #0\n\t" "mvn r7, r7\n\t" "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[b], #0]\n\t" - "adds r4, r5\n\t" + "adds r4, r4, r5\n\t" "str r4, [%[r], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #124]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "add %[r], #0x80\n\t" - "adds %[c], r7\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" + "add %[r], %[r], #0x80\n\t" + "adds %[c], %[c], r7\n\t" "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[b], #0]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #124]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" + "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r7" @@ -2448,24 +2448,24 @@ SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r6, %[a]\n\t" "mov r7, #0\n\t" "mov r4, #1\n\t" - "lsl r4, #8\n\t" - "sub r7, #1\n\t" - "add r6, r4\n\t" + "lsl r4, r4, #8\n\t" + "sub r7, r7, #1\n\t" + "add r6, r6, r4\n\t" "\n1:\n\t" - "adds %[c], r7\n\t" + "adds %[c], %[c], r7\n\t" "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r]]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -2487,25 +2487,25 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r7, %[a]\n\t" "mov r5, #1\n\t" - "lsl r5, #8\n\t" - "add r7, r5\n\t" + "lsl r5, r5, #8\n\t" + "add r7, r7, r5\n\t" "\n1:\n\t" "mov r5, #0\n\t" - "subs r5, %[c]\n\t" + "subs r5, r5, %[c]\n\t" "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #8\n\t" - "add %[b], #8\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #8\n\t" + "add %[b], %[b], #8\n\t" "cmp %[a], r7\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) @@ -2528,7 +2528,7 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, const sp_digit* b) { sp_digit tmp[64 * 2]; - __asm__ __volatile__ ( + asm volatile ( "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r8, r3\n\t" @@ -2537,36 +2537,36 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, "mov r10, %[b]\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "mov r12, r6\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" "mov r6, #252\n\t" "mov %[a], r8\n\t" - "subs %[a], r6\n\t" - "sbc r6, r6\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" - "and %[a], r6\n\t" + "and %[a], %[a], r6\n\t" "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" + "sub %[b], %[b], %[a]\n\t" + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" "\n2:\n\t" - "# Multiply Start\n\t" + /* Multiply Start */ "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply Done */ + "add %[a], %[a], #4\n\t" + "sub %[b], %[b], #4\n\t" "cmp %[a], r12\n\t" "beq 3f\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" "ble 2b\n\t" "\n3:\n\t" @@ -2575,11 +2575,11 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, "str r3, [%[r], r7]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add r7, #4\n\t" + "add r7, r7, #4\n\t" "mov r8, r7\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, #248\n\t" + "add r6, r6, #248\n\t" "cmp r7, r6\n\t" "ble 1b\n\t" "str r3, [%[r], r7]\n\t" @@ -2600,7 +2600,7 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, */ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) { - __asm__ __volatile__ ( + asm volatile ( "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r5, #0\n\t" @@ -2609,56 +2609,56 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "mov r6, #2\n\t" "lsl r6, r6, #8\n\t" "neg r6, r6\n\t" - "add sp, r6\n\t" + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r6, #252\n\t" "mov %[a], r8\n\t" - "subs %[a], r6\n\t" - "sbc r6, r6\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" - "and %[a], r6\n\t" + "and %[a], %[a], r6\n\t" "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" + "sub r2, r2, %[a]\n\t" + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" "beq 4f\n\t" - "# Multiply * 2: Start\n\t" + /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# Multiply * 2: Done\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply * 2: Done */ "bal 5f\n\t" "\n4:\n\t" - "# Square: Start\n\t" + /* Square: Start */ "ldr r6, [%[a]]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# Square: Done\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Square: Done */ "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" + "add %[a], %[a], #4\n\t" + "sub r2, r2, #4\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" "beq 3f\n\t" "cmp %[a], r2\n\t" "bgt 3f\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" "ble 2b\n\t" "\n3:\n\t" @@ -2668,11 +2668,11 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "add r7, #4\n\t" + "add r7, r7, #4\n\t" "mov r8, r7\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, #248\n\t" + "add r6, r6, #248\n\t" "cmp r7, r6\n\t" "ble 1b\n\t" "mov %[a], r9\n\t" @@ -2681,15 +2681,15 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "mov %[a], r10\n\t" "mov r3, #1\n\t" "lsl r3, r3, #8\n\t" - "add r3, #252\n\t" + "add r3, r3, #252\n\t" "\n4:\n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "subs r3, #4\n\t" + "subs r3, r3, #4\n\t" "bge 4b\n\t" "mov r6, #2\n\t" "lsl r6, r6, #8\n\t" - "add sp, r6\n\t" + "add sp, sp, r6\n\t" : : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -2697,8 +2697,8 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ - !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* AND m into each word of a and store in r. * @@ -2727,22 +2727,22 @@ SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r6, %[a]\n\t" "mov r7, #0\n\t" - "add r6, #128\n\t" - "sub r7, #1\n\t" + "add r6, r6, #128\n\t" + "sub r7, r7, #1\n\t" "\n1:\n\t" - "adds %[c], r7\n\t" + "adds %[c], %[c], r7\n\t" "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r]]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -2764,23 +2764,23 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r7, %[a]\n\t" - "add r7, #128\n\t" + "add r7, r7, #128\n\t" "\n1:\n\t" "mov r5, #0\n\t" - "subs r5, %[c]\n\t" + "subs r5, r5, %[c]\n\t" "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #8\n\t" - "add %[b], #8\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #8\n\t" + "add %[b], %[b], #8\n\t" "cmp %[a], r7\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) @@ -2803,7 +2803,7 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, const sp_digit* b) { sp_digit tmp[32 * 2]; - __asm__ __volatile__ ( + asm volatile ( "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r8, r3\n\t" @@ -2811,36 +2811,36 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, "mov r9, %[a]\n\t" "mov r10, %[b]\n\t" "mov r6, #128\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "mov r12, r6\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" "mov r6, #124\n\t" "mov %[a], r8\n\t" - "subs %[a], r6\n\t" - "sbc r6, r6\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" - "and %[a], r6\n\t" + "and %[a], %[a], r6\n\t" "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" + "sub %[b], %[b], %[a]\n\t" + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" "\n2:\n\t" - "# Multiply Start\n\t" + /* Multiply Start */ "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply Done */ + "add %[a], %[a], #4\n\t" + "sub %[b], %[b], #4\n\t" "cmp %[a], r12\n\t" "beq 3f\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" "ble 2b\n\t" "\n3:\n\t" @@ -2849,7 +2849,7 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, "str r3, [%[r], r7]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add r7, #4\n\t" + "add r7, r7, #4\n\t" "mov r8, r7\n\t" "mov r6, #248\n\t" "cmp r7, r6\n\t" @@ -2872,7 +2872,7 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, */ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) { - __asm__ __volatile__ ( + asm volatile ( "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r5, #0\n\t" @@ -2881,55 +2881,55 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "neg r6, r6\n\t" - "add sp, r6\n\t" + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r6, #124\n\t" "mov %[a], r8\n\t" - "subs %[a], r6\n\t" - "sbc r6, r6\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" - "and %[a], r6\n\t" + "and %[a], %[a], r6\n\t" "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" + "sub r2, r2, %[a]\n\t" + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" "beq 4f\n\t" - "# Multiply * 2: Start\n\t" + /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# Multiply * 2: Done\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply * 2: Done */ "bal 5f\n\t" "\n4:\n\t" - "# Square: Start\n\t" + /* Square: Start */ "ldr r6, [%[a]]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# Square: Done\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Square: Done */ "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" + "add %[a], %[a], #4\n\t" + "sub r2, r2, #4\n\t" "mov r6, #128\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" "beq 3f\n\t" "cmp %[a], r2\n\t" "bgt 3f\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" "ble 2b\n\t" "\n3:\n\t" @@ -2939,7 +2939,7 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "add r7, #4\n\t" + "add r7, r7, #4\n\t" "mov r8, r7\n\t" "mov r6, #248\n\t" "cmp r7, r6\n\t" @@ -2952,11 +2952,11 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "\n4:\n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "subs r3, #4\n\t" + "subs r3, r3, #4\n\t" "bge 4b\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add sp, r6\n\t" + "add sp, sp, r6\n\t" : : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -2964,7 +2964,8 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_SH) && */ + /* !WOLFSSL_RSA_PUBLIC_ONLY */ /* Caclulate the bottom digit of -1/a mod 2^n. * @@ -2994,30 +2995,30 @@ static void sp_2048_mont_setup(sp_digit* a, sp_digit* rho) SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, const sp_digit b) { - __asm__ __volatile__ ( + asm volatile ( "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, %[a]\n\t" + "add r6, r6, %[a]\n\t" "mov r8, %[r]\n\t" "mov r9, r6\n\t" "mov r3, #0\n\t" "mov r4, #0\n\t" - "1:\n\t" + "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" - "# A[] * B\n\t" + /* A[] * B */ "ldr r6, [%[a]]\n\t" "umull r6, r7, r6, %[b]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[] * B - Done\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[] * B - Done */ "mov %[r], r8\n\t" "str r3, [%[r]]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" + "add %[r], %[r], #4\n\t" + "add %[a], %[a], #4\n\t" "mov r8, %[r]\n\t" "cmp %[a], r9\n\t" "blt 1b\n\t" @@ -3028,8 +3029,8 @@ SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, ); } -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ - !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) /* r = 2^n mod m where n is the number of bits to reduce by. * Given m must be 2048 bits, just need to subtract. * @@ -3057,20 +3058,20 @@ SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r5, #128\n\t" "mov r8, r5\n\t" "mov r7, #0\n\t" - "1:\n\t" + "\n1:\n\t" "ldr r6, [%[b], r7]\n\t" - "and r6, %[m]\n\t" + "and r6, r6, %[m]\n\t" "mov r5, #0\n\t" - "subs r5, %[c]\n\t" + "subs r5, r5, %[c]\n\t" "ldr r5, [%[a], r7]\n\t" - "sbcs r5, r6\n\t" - "sbcs %[c], %[c]\n\t" + "sbcs r5, r5, r6\n\t" + "sbcs %[c], %[c], %[c]\n\t" "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" + "add r7, r7, #4\n\t" "cmp r7, r8\n\t" "blt 1b\n\t" : [c] "+r" (c) @@ -3092,73 +3093,73 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, sp_digit* m, { sp_digit ca = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r8, %[mp]\n\t" "mov r12, %[ca]\n\t" "mov r14, %[m]\n\t" "mov r9, %[a]\n\t" "mov r4, #0\n\t" - "# i = 0\n\t" + /* i = 0 */ "mov r11, r4\n\t" "\n1:\n\t" "mov r5, #0\n\t" "mov %[ca], #0\n\t" - "# mu = a[i] * mp\n\t" + /* mu = a[i] * mp */ "mov %[mp], r8\n\t" "ldr %[a], [%[a]]\n\t" - "mul %[mp], %[a]\n\t" + "mul %[mp], %[mp], %[a]\n\t" "mov %[m], r14\n\t" "mov r10, r9\n\t" "\n2:\n\t" - "# a[i+j] += m[j] * mu\n\t" + /* a[i+j] += m[j] * mu */ "mov %[a], r10\n\t" "ldr %[a], [%[a]]\n\t" "mov %[ca], #0\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "# Multiply m[j] and mu - Start\n\t" + /* Multiply m[j] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" - "adds %[a], r6\n\t" - "adcs r5, r7\n\t" - "# Multiply m[j] and mu - Done\n\t" - "adds r4, %[a]\n\t" - "adc r5, %[ca]\n\t" + "adds %[a], %[a], r6\n\t" + "adcs r5, r5, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, %[ca]\n\t" "mov %[a], r10\n\t" "str r4, [%[a]]\n\t" "mov r6, #4\n\t" - "add %[m], #4\n\t" - "add r10, r6\n\t" + "add %[m], %[m], #4\n\t" + "add r10, r10, r6\n\t" "mov r4, #124\n\t" - "add r4, r9\n\t" + "add r4, r4, r9\n\t" "cmp r10, r4\n\t" "blt 2b\n\t" - "# a[i+31] += m[31] * mu\n\t" + /* a[i+31] += m[31] * mu */ "mov %[ca], #0\n\t" "mov r4, r12\n\t" "mov %[a], #0\n\t" - "# Multiply m[31] and mu - Start\n\t" + /* Multiply m[31] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" - "adds r5, r6\n\t" - "adcs r4, r7\n\t" - "adc %[a], %[ca]\n\t" - "# Multiply m[31] and mu - Done\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc %[a], %[a], %[ca]\n\t" + /* Multiply m[31] and mu - Done */ "mov %[ca], %[a]\n\t" "mov %[a], r10\n\t" "ldr r7, [%[a], #4]\n\t" "ldr %[a], [%[a]]\n\t" "mov r6, #0\n\t" - "adds r5, %[a]\n\t" - "adcs r7, r4\n\t" - "adc %[ca], r6\n\t" + "adds r5, r5, %[a]\n\t" + "adcs r7, r7, r4\n\t" + "adc %[ca], %[ca], r6\n\t" "mov %[a], r10\n\t" "str r5, [%[a]]\n\t" "str r7, [%[a], #4]\n\t" - "# i += 1\n\t" + /* i += 1 */ "mov r6, #4\n\t" - "add r9, r6\n\t" - "add r11, r6\n\t" + "add r9, r9, r6\n\t" + "add r11, r11, r6\n\t" "mov r12, %[ca]\n\t" "mov %[a], r9\n\t" "mov r4, #128\n\t" @@ -3212,29 +3213,29 @@ static void sp_2048_mont_sqr_32(sp_digit* r, sp_digit* a, sp_digit* m, SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, const sp_digit b) { - __asm__ __volatile__ ( + asm volatile ( "mov r6, #128\n\t" - "add r6, %[a]\n\t" + "add r6, r6, %[a]\n\t" "mov r8, %[r]\n\t" "mov r9, r6\n\t" "mov r3, #0\n\t" "mov r4, #0\n\t" - "1:\n\t" + "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" - "# A[] * B\n\t" + /* A[] * B */ "ldr r6, [%[a]]\n\t" "umull r6, r7, r6, %[b]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[] * B - Done\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[] * B - Done */ "mov %[r], r8\n\t" "str r3, [%[r]]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" + "add %[r], %[r], #4\n\t" + "add %[a], %[a], #4\n\t" "mov r8, %[r]\n\t" "cmp %[a], r9\n\t" "blt 1b\n\t" @@ -3259,37 +3260,37 @@ SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, { sp_digit r = 0; - __asm__ __volatile__ ( - "lsr r6, %[div], 16\n\t\n\t" - "add r6, r6, 1\n\t\n\t" - "udiv r4, %[d1], r6\n\t\n\t" - "lsl r7, r4, 16\n\t\n\t" - "umull r4, r5, %[div], r7\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "udiv r5, %[d1], r6\n\t\n\t" - "lsl r4, r5, 16\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "lsl r4, %[d1], 16\n\t\n\t" - "orr r4, r4, %[d0], lsr 16\n\t\n\t" - "udiv r4, r4, r6\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "lsl r4, %[d1], 16\n\t\n\t" - "orr r4, r4, %[d0], lsr 16\n\t\n\t" - "udiv r4, r4, r6\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "udiv r4, %[d0], %[div]\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "mov %[r], r7\n\t\n\t" + asm volatile ( + "lsr r6, %[div], #16\n\t" + "add r6, r6, #1\n\t" + "udiv r4, %[d1], r6\n\t" + "lsl r7, r4, #16\n\t" + "umull r4, r5, %[div], r7\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r5, %[d1], r6\n\t" + "lsl r4, r5, #16\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr 16\n\t" + "udiv r4, r4, r6\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr 16\n\t" + "udiv r4, r4, r6\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r4, %[d0], %[div]\n\t" + "add r7, r7, r4\n\t" + "mov %[r], r7\n\t" : [r] "+r" (r) : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) : "r4", "r5", "r6", "r7" @@ -3309,27 +3310,27 @@ SP_NOINLINE static int32_t sp_2048_cmp_32(sp_digit* a, sp_digit* b) sp_digit r = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r3, #0\n\t" "mvn r3, r3\n\t" "mov r6, #124\n\t" - "1:\n\t" + "\n1:\n\t" "ldr r7, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" - "and r7, r3\n\t" - "and r5, r3\n\t" + "and r7, r7, r3\n\t" + "and r5, r5, r3\n\t" "mov r4, r7\n\t" - "subs r7, r5\n\t" - "sbc r7, r7\n\t" - "add %[r], r7\n\t" + "subs r7, r7, r5\n\t" + "sbc r7, r7, r7\n\t" + "add %[r], %[r], r7\n\t" "mvn r7, r7\n\t" - "and r3, r7\n\t" - "subs r5, r4\n\t" - "sbc r7, r7\n\t" - "sub %[r], r7\n\t" + "and r3, r3, r7\n\t" + "subs r5, r5, r4\n\t" + "sbc r7, r7, r7\n\t" + "sub %[r], %[r], r7\n\t" "mvn r7, r7\n\t" - "and r3, r7\n\t" - "sub r6, #4\n\t" + "and r3, r3, r7\n\t" + "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" "bge 1b\n\t" : [r] "+r" (r) @@ -3663,7 +3664,8 @@ static int sp_2048_mod_exp_32(sp_digit* r, sp_digit* a, sp_digit* e, } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_SH) && */ + /* !WOLFSSL_RSA_PUBLIC_ONLY */ #ifdef WOLFSSL_HAVE_SP_DH /* r = 2^n mod m where n is the number of bits to reduce by. @@ -3694,21 +3696,21 @@ SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r5, #1\n\t" "lsl r5, r5, #8\n\t" "mov r8, r5\n\t" "mov r7, #0\n\t" - "1:\n\t" + "\n1:\n\t" "ldr r6, [%[b], r7]\n\t" - "and r6, %[m]\n\t" + "and r6, r6, %[m]\n\t" "mov r5, #0\n\t" - "subs r5, %[c]\n\t" + "subs r5, r5, %[c]\n\t" "ldr r5, [%[a], r7]\n\t" - "sbcs r5, r6\n\t" - "sbcs %[c], %[c]\n\t" + "sbcs r5, r5, r6\n\t" + "sbcs %[c], %[c], %[c]\n\t" "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" + "add r7, r7, #4\n\t" "cmp r7, r8\n\t" "blt 1b\n\t" : [c] "+r" (c) @@ -3730,73 +3732,73 @@ SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, sp_digit* m, { sp_digit ca = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r8, %[mp]\n\t" "mov r12, %[ca]\n\t" "mov r14, %[m]\n\t" "mov r9, %[a]\n\t" "mov r4, #0\n\t" - "# i = 0\n\t" + /* i = 0 */ "mov r11, r4\n\t" "\n1:\n\t" "mov r5, #0\n\t" "mov %[ca], #0\n\t" - "# mu = a[i] * mp\n\t" + /* mu = a[i] * mp */ "mov %[mp], r8\n\t" "ldr %[a], [%[a]]\n\t" - "mul %[mp], %[a]\n\t" + "mul %[mp], %[mp], %[a]\n\t" "mov %[m], r14\n\t" "mov r10, r9\n\t" "\n2:\n\t" - "# a[i+j] += m[j] * mu\n\t" + /* a[i+j] += m[j] * mu */ "mov %[a], r10\n\t" "ldr %[a], [%[a]]\n\t" "mov %[ca], #0\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "# Multiply m[j] and mu - Start\n\t" + /* Multiply m[j] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" - "adds %[a], r6\n\t" - "adcs r5, r7\n\t" - "# Multiply m[j] and mu - Done\n\t" - "adds r4, %[a]\n\t" - "adc r5, %[ca]\n\t" + "adds %[a], %[a], r6\n\t" + "adcs r5, r5, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, %[ca]\n\t" "mov %[a], r10\n\t" "str r4, [%[a]]\n\t" "mov r6, #4\n\t" - "add %[m], #4\n\t" - "add r10, r6\n\t" + "add %[m], %[m], #4\n\t" + "add r10, r10, r6\n\t" "mov r4, #252\n\t" - "add r4, r9\n\t" + "add r4, r4, r9\n\t" "cmp r10, r4\n\t" "blt 2b\n\t" - "# a[i+63] += m[63] * mu\n\t" + /* a[i+63] += m[63] * mu */ "mov %[ca], #0\n\t" "mov r4, r12\n\t" "mov %[a], #0\n\t" - "# Multiply m[63] and mu - Start\n\t" + /* Multiply m[63] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" - "adds r5, r6\n\t" - "adcs r4, r7\n\t" - "adc %[a], %[ca]\n\t" - "# Multiply m[63] and mu - Done\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc %[a], %[a], %[ca]\n\t" + /* Multiply m[63] and mu - Done */ "mov %[ca], %[a]\n\t" "mov %[a], r10\n\t" "ldr r7, [%[a], #4]\n\t" "ldr %[a], [%[a]]\n\t" "mov r6, #0\n\t" - "adds r5, %[a]\n\t" - "adcs r7, r4\n\t" - "adc %[ca], r6\n\t" + "adds r5, r5, %[a]\n\t" + "adcs r7, r7, r4\n\t" + "adc %[ca], %[ca], r6\n\t" "mov %[a], r10\n\t" "str r5, [%[a]]\n\t" "str r7, [%[a], #4]\n\t" - "# i += 1\n\t" + /* i += 1 */ "mov r6, #4\n\t" - "add r9, r6\n\t" - "add r11, r6\n\t" + "add r9, r9, r6\n\t" + "add r11, r11, r6\n\t" "mov r12, %[ca]\n\t" "mov %[a], r9\n\t" "mov r4, #1\n\t" @@ -3856,37 +3858,37 @@ SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, { sp_digit r = 0; - __asm__ __volatile__ ( - "lsr r6, %[div], 16\n\t\n\t" - "add r6, r6, 1\n\t\n\t" - "udiv r4, %[d1], r6\n\t\n\t" - "lsl r7, r4, 16\n\t\n\t" - "umull r4, r5, %[div], r7\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "udiv r5, %[d1], r6\n\t\n\t" - "lsl r4, r5, 16\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "lsl r4, %[d1], 16\n\t\n\t" - "orr r4, r4, %[d0], lsr 16\n\t\n\t" - "udiv r4, r4, r6\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "lsl r4, %[d1], 16\n\t\n\t" - "orr r4, r4, %[d0], lsr 16\n\t\n\t" - "udiv r4, r4, r6\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "udiv r4, %[d0], %[div]\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "mov %[r], r7\n\t\n\t" + asm volatile ( + "lsr r6, %[div], #16\n\t" + "add r6, r6, #1\n\t" + "udiv r4, %[d1], r6\n\t" + "lsl r7, r4, #16\n\t" + "umull r4, r5, %[div], r7\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r5, %[d1], r6\n\t" + "lsl r4, r5, #16\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr 16\n\t" + "udiv r4, r4, r6\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr 16\n\t" + "udiv r4, r4, r6\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r4, %[d0], %[div]\n\t" + "add r7, r7, r4\n\t" + "mov %[r], r7\n\t" : [r] "+r" (r) : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) : "r4", "r5", "r6", "r7" @@ -3935,27 +3937,27 @@ SP_NOINLINE static int32_t sp_2048_cmp_64(sp_digit* a, sp_digit* b) sp_digit r = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r3, #0\n\t" "mvn r3, r3\n\t" "mov r6, #252\n\t" - "1:\n\t" + "\n1:\n\t" "ldr r7, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" - "and r7, r3\n\t" - "and r5, r3\n\t" + "and r7, r7, r3\n\t" + "and r5, r5, r3\n\t" "mov r4, r7\n\t" - "subs r7, r5\n\t" - "sbc r7, r7\n\t" - "add %[r], r7\n\t" + "subs r7, r7, r5\n\t" + "sbc r7, r7, r7\n\t" + "add %[r], %[r], r7\n\t" "mvn r7, r7\n\t" - "and r3, r7\n\t" - "subs r5, r4\n\t" - "sbc r7, r7\n\t" - "sub %[r], r7\n\t" + "and r3, r3, r7\n\t" + "subs r5, r5, r4\n\t" + "sbc r7, r7, r7\n\t" + "sub %[r], %[r], r7\n\t" "mvn r7, r7\n\t" - "and r3, r7\n\t" - "sub r6, #4\n\t" + "and r3, r3, r7\n\t" + "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" "bge 1b\n\t" : [r] "+r" (r) @@ -4693,6 +4695,7 @@ int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) return err; } +#ifdef WOLFSSL_HAVE_SP_DH /* Perform the modular exponentiation for Diffie-Hellman. * * base Base. @@ -4740,6 +4743,7 @@ int sp_DhExp_2048(mp_int* base, const byte* exp, word32 expLen, return err; } +#endif /* WOLFSSL_HAVE_SP_DH */ /* Perform the modular exponentiation for Diffie-Hellman. * @@ -4926,500 +4930,500 @@ SP_NOINLINE static void sp_3072_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { sp_digit tmp[8]; - __asm__ __volatile__ ( + asm volatile ( "mov r8, %[r]\n\t" "mov %[r], #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 0]\n\t" + /* A[0] * B[0] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r3, r4, r6, r7\n\t" "mov r5, #0\n\t" - "str r3, [%[tmp], 0]\n\t" + "str r3, [%[tmp], #0]\n\t" "mov r3, #0\n\t" - "# A[0] * B[1]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 4]\n\t" + /* A[0] * B[1] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * B[0]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * B[0] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 4]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #4]\n\t" "mov r4, #0\n\t" - "# A[0] * B[2]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 8]\n\t" + /* A[0] * B[2] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[1] * B[1]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[1] * B[1] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * B[0]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * B[0] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [%[tmp], 8]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [%[tmp], #8]\n\t" "mov r5, #0\n\t" - "# A[0] * B[3]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 12]\n\t" + /* A[0] * B[3] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[1] * B[2]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[1] * B[2] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[2] * B[1]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[2] * B[1] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * B[0]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * B[0] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [%[tmp], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [%[tmp], #12]\n\t" "mov r3, #0\n\t" - "# A[0] * B[4]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 16]\n\t" + /* A[0] * B[4] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * B[3]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * B[3] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[2] * B[2]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[2] * B[2] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[3] * B[1]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[3] * B[1] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[4] * B[0]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[4] * B[0] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #16]\n\t" "mov r4, #0\n\t" - "# A[0] * B[5]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 20]\n\t" + /* A[0] * B[5] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[1] * B[4]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[1] * B[4] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * B[3]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * B[3] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[3] * B[2]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[3] * B[2] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[4] * B[1]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[4] * B[1] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[5] * B[0]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[5] * B[0] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [%[tmp], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [%[tmp], #20]\n\t" "mov r5, #0\n\t" - "# A[0] * B[6]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 24]\n\t" + /* A[0] * B[6] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[1] * B[5]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[1] * B[5] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[2] * B[4]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[2] * B[4] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * B[3]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * B[3] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[4] * B[2]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[4] * B[2] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[5] * B[1]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[5] * B[1] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[6] * B[0]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[6] * B[0] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [%[tmp], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [%[tmp], #24]\n\t" "mov r3, #0\n\t" - "# A[0] * B[7]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[0] * B[7] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * B[6]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * B[6] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[2] * B[5]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[2] * B[5] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[3] * B[4]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[3] * B[4] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[4] * B[3]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[4] * B[3] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[5] * B[2]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[5] * B[2] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[6] * B[1]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[6] * B[1] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[7] * B[0]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[7] * B[0] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 28]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #28]\n\t" "mov r4, #0\n\t" - "# A[1] * B[7]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[1] * B[7] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * B[6]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * B[6] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[3] * B[5]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[3] * B[5] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[4] * B[4]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[4] * B[4] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[5] * B[3]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[5] * B[3] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[6] * B[2]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[6] * B[2] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[7] * B[1]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[7] * B[1] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [r8, 32]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [r8, #32]\n\t" "mov r5, #0\n\t" - "# A[2] * B[7]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[2] * B[7] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * B[6]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * B[6] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[4] * B[5]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[4] * B[5] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[5] * B[4]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[5] * B[4] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[6] * B[3]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[6] * B[3] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[7] * B[2]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[7] * B[2] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [r8, 36]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [r8, #36]\n\t" "mov r3, #0\n\t" - "# A[3] * B[7]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[3] * B[7] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[4] * B[6]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[4] * B[6] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[5] * B[5]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[5] * B[5] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[6] * B[4]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[6] * B[4] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[7] * B[3]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[7] * B[3] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [r8, 40]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [r8, #40]\n\t" "mov r4, #0\n\t" - "# A[4] * B[7]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[4] * B[7] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[5] * B[6]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[5] * B[6] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[6] * B[5]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[6] * B[5] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[7] * B[4]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[7] * B[4] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [r8, 44]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [r8, #44]\n\t" "mov r5, #0\n\t" - "# A[5] * B[7]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[5] * B[7] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[6] * B[6]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[6] * B[6] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[7] * B[5]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[7] * B[5] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [r8, 48]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [r8, #48]\n\t" "mov r3, #0\n\t" - "# A[6] * B[7]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[6] * B[7] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[7] * B[6]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[7] * B[6] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [r8, 52]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [r8, #52]\n\t" "mov r4, #0\n\t" - "# A[7] * B[7]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[7] * B[7] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "str r5, [r8, 56]\n\t" - "str r3, [r8, 60]\n\t" - "ldr r3, [%[tmp], 0]\n\t" - "ldr r4, [%[tmp], 4]\n\t" - "ldr r5, [%[tmp], 8]\n\t" - "ldr r6, [%[tmp], 12]\n\t" - "str r3, [r8, 0]\n\t" - "str r4, [r8, 4]\n\t" - "str r5, [r8, 8]\n\t" - "str r6, [r8, 12]\n\t" - "ldr r3, [%[tmp], 16]\n\t" - "ldr r4, [%[tmp], 20]\n\t" - "ldr r5, [%[tmp], 24]\n\t" - "ldr r6, [%[tmp], 28]\n\t" - "str r3, [r8, 16]\n\t" - "str r4, [r8, 20]\n\t" - "str r5, [r8, 24]\n\t" - "str r6, [r8, 28]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "str r5, [r8, #56]\n\t" + "str r3, [r8, #60]\n\t" + "ldr r3, [%[tmp], #0]\n\t" + "ldr r4, [%[tmp], #4]\n\t" + "ldr r5, [%[tmp], #8]\n\t" + "ldr r6, [%[tmp], #12]\n\t" + "str r3, [r8, #0]\n\t" + "str r4, [r8, #4]\n\t" + "str r5, [r8, #8]\n\t" + "str r6, [r8, #12]\n\t" + "ldr r3, [%[tmp], #16]\n\t" + "ldr r4, [%[tmp], #20]\n\t" + "ldr r5, [%[tmp], #24]\n\t" + "ldr r6, [%[tmp], #28]\n\t" + "str r3, [r8, #16]\n\t" + "str r4, [r8, #20]\n\t" + "str r5, [r8, #24]\n\t" + "str r6, [r8, #28]\n\t" "mov %[r], r8\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) @@ -5435,380 +5439,380 @@ SP_NOINLINE static void sp_3072_mul_8(sp_digit* r, const sp_digit* a, SP_NOINLINE static void sp_3072_sqr_8(sp_digit* r, const sp_digit* a) { sp_digit tmp[8]; - __asm__ __volatile__ ( + asm volatile ( "mov r8, %[r]\n\t" "mov %[r], #0\n\t" - "# A[0] * A[0]\n\t" - "ldr r6, [%[a], 0]\n\t" + /* A[0] * A[0] */ + "ldr r6, [%[a], #0]\n\t" "umull r3, r4, r6, r6\n\t" "mov r5, #0\n\t" - "str r3, [%[tmp], 0]\n\t" + "str r3, [%[tmp], #0]\n\t" "mov r3, #0\n\t" - "# A[0] * A[1]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 4]\n\t" + /* A[0] * A[1] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 4]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #4]\n\t" "mov r4, #0\n\t" - "# A[0] * A[2]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 8]\n\t" + /* A[0] * A[2] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[1] * A[1]\n\t" - "ldr r6, [%[a], 4]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[1] * A[1] */ + "ldr r6, [%[a], #4]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [%[tmp], 8]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [%[tmp], #8]\n\t" "mov r5, #0\n\t" - "# A[0] * A[3]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 12]\n\t" + /* A[0] * A[3] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[1] * A[2]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 8]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[1] * A[2] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [%[tmp], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [%[tmp], #12]\n\t" "mov r3, #0\n\t" - "# A[0] * A[4]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 16]\n\t" + /* A[0] * A[4] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * A[3]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 12]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * A[3] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[2] * A[2]\n\t" - "ldr r6, [%[a], 8]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[2] * A[2] */ + "ldr r6, [%[a], #8]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #16]\n\t" "mov r4, #0\n\t" - "# A[0] * A[5]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 20]\n\t" + /* A[0] * A[5] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[1] * A[4]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[1] * A[4] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * A[3]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 12]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * A[3] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [%[tmp], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [%[tmp], #20]\n\t" "mov r5, #0\n\t" - "# A[0] * A[6]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 24]\n\t" + /* A[0] * A[6] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[1] * A[5]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[1] * A[5] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[2] * A[4]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 16]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[2] * A[4] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * A[3]\n\t" - "ldr r6, [%[a], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * A[3] */ + "ldr r6, [%[a], #12]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [%[tmp], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [%[tmp], #24]\n\t" "mov r3, #0\n\t" - "# A[0] * A[7]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[0] * A[7] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * A[6]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * A[6] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[2] * A[5]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 20]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[2] * A[5] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[3] * A[4]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[a], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[3] * A[4] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 28]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #28]\n\t" "mov r4, #0\n\t" - "# A[1] * A[7]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[1] * A[7] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * A[6]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * A[6] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[3] * A[5]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[a], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[3] * A[5] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[4] * A[4]\n\t" - "ldr r6, [%[a], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[4] * A[4] */ + "ldr r6, [%[a], #16]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [r8, 32]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [r8, #32]\n\t" "mov r5, #0\n\t" - "# A[2] * A[7]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[2] * A[7] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * A[6]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * A[6] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[4] * A[5]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[a], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[4] * A[5] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [r8, 36]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [r8, #36]\n\t" "mov r3, #0\n\t" - "# A[3] * A[7]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[3] * A[7] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[4] * A[6]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[4] * A[6] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[5] * A[5]\n\t" - "ldr r6, [%[a], 20]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[5] * A[5] */ + "ldr r6, [%[a], #20]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [r8, 40]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [r8, #40]\n\t" "mov r4, #0\n\t" - "# A[4] * A[7]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[4] * A[7] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[5] * A[6]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[5] * A[6] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [r8, 44]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [r8, #44]\n\t" "mov r5, #0\n\t" - "# A[5] * A[7]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[5] * A[7] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[6] * A[6]\n\t" - "ldr r6, [%[a], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[6] * A[6] */ + "ldr r6, [%[a], #24]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [r8, 48]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [r8, #48]\n\t" "mov r3, #0\n\t" - "# A[6] * A[7]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[6] * A[7] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [r8, 52]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [r8, #52]\n\t" "mov r4, #0\n\t" - "# A[7] * A[7]\n\t" - "ldr r6, [%[a], 28]\n\t" + /* A[7] * A[7] */ + "ldr r6, [%[a], #28]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "str r5, [r8, 56]\n\t" - "str r3, [r8, 60]\n\t" - "ldr r3, [%[tmp], 0]\n\t" - "ldr r4, [%[tmp], 4]\n\t" - "ldr r5, [%[tmp], 8]\n\t" - "ldr r6, [%[tmp], 12]\n\t" - "str r3, [r8, 0]\n\t" - "str r4, [r8, 4]\n\t" - "str r5, [r8, 8]\n\t" - "str r6, [r8, 12]\n\t" - "ldr r3, [%[tmp], 16]\n\t" - "ldr r4, [%[tmp], 20]\n\t" - "ldr r5, [%[tmp], 24]\n\t" - "ldr r6, [%[tmp], 28]\n\t" - "str r3, [r8, 16]\n\t" - "str r4, [r8, 20]\n\t" - "str r5, [r8, 24]\n\t" - "str r6, [r8, 28]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "str r5, [r8, #56]\n\t" + "str r3, [r8, #60]\n\t" + "ldr r3, [%[tmp], #0]\n\t" + "ldr r4, [%[tmp], #4]\n\t" + "ldr r5, [%[tmp], #8]\n\t" + "ldr r6, [%[tmp], #12]\n\t" + "str r3, [r8, #0]\n\t" + "str r4, [r8, #4]\n\t" + "str r5, [r8, #8]\n\t" + "str r6, [r8, #12]\n\t" + "ldr r3, [%[tmp], #16]\n\t" + "ldr r4, [%[tmp], #20]\n\t" + "ldr r5, [%[tmp], #24]\n\t" + "ldr r6, [%[tmp], #28]\n\t" + "str r3, [r8, #16]\n\t" + "str r4, [r8, #20]\n\t" + "str r5, [r8, #24]\n\t" + "str r6, [r8, #28]\n\t" "mov %[r], r8\n\t" : : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp) @@ -5827,41 +5831,41 @@ SP_NOINLINE static sp_digit sp_3072_add_8(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[b], #0]\n\t" - "adds r4, r5\n\t" + "adds r4, r4, r5\n\t" "str r4, [%[r], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #28]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" + "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5" @@ -5881,72 +5885,72 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_16(sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r3, [%[a], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #0]\n\t" "ldr r6, [%[b], #4]\n\t" - "subs r3, r5\n\t" - "sbcs r4, r6\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #0]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" - "sbc %[c], %[c]\n\t" + "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6" @@ -5966,73 +5970,73 @@ SP_NOINLINE static sp_digit sp_3072_add_16(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[b], #0]\n\t" - "adds r4, r5\n\t" + "adds r4, r4, r5\n\t" "str r4, [%[r], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #60]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" + "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5" @@ -6137,136 +6141,136 @@ SP_NOINLINE static sp_digit sp_3072_sub_32(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[a], #4]\n\t" "ldr r6, [%[b], #0]\n\t" "ldr r7, [%[b], #4]\n\t" - "subs r4, r6\n\t" - "sbcs r5, r7\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #0]\n\t" "str r5, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[a], #12]\n\t" "ldr r6, [%[b], #8]\n\t" "ldr r7, [%[b], #12]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #8]\n\t" "str r5, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[a], #20]\n\t" "ldr r6, [%[b], #16]\n\t" "ldr r7, [%[b], #20]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[a], #28]\n\t" "ldr r6, [%[b], #24]\n\t" "ldr r7, [%[b], #28]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #24]\n\t" "str r5, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[a], #36]\n\t" "ldr r6, [%[b], #32]\n\t" "ldr r7, [%[b], #36]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #32]\n\t" "str r5, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[a], #44]\n\t" "ldr r6, [%[b], #40]\n\t" "ldr r7, [%[b], #44]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #40]\n\t" "str r5, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[a], #52]\n\t" "ldr r6, [%[b], #48]\n\t" "ldr r7, [%[b], #52]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #48]\n\t" "str r5, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[a], #60]\n\t" "ldr r6, [%[b], #56]\n\t" "ldr r7, [%[b], #60]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #56]\n\t" "str r5, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[a], #68]\n\t" "ldr r6, [%[b], #64]\n\t" "ldr r7, [%[b], #68]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #64]\n\t" "str r5, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[a], #76]\n\t" "ldr r6, [%[b], #72]\n\t" "ldr r7, [%[b], #76]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #72]\n\t" "str r5, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[a], #84]\n\t" "ldr r6, [%[b], #80]\n\t" "ldr r7, [%[b], #84]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #80]\n\t" "str r5, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[a], #92]\n\t" "ldr r6, [%[b], #88]\n\t" "ldr r7, [%[b], #92]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #88]\n\t" "str r5, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[a], #100]\n\t" "ldr r6, [%[b], #96]\n\t" "ldr r7, [%[b], #100]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #96]\n\t" "str r5, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[a], #108]\n\t" "ldr r6, [%[b], #104]\n\t" "ldr r7, [%[b], #108]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #104]\n\t" "str r5, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[a], #116]\n\t" "ldr r6, [%[b], #112]\n\t" "ldr r7, [%[b], #116]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #112]\n\t" "str r5, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[a], #124]\n\t" "ldr r6, [%[b], #120]\n\t" "ldr r7, [%[b], #124]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #120]\n\t" "str r5, [%[r], #124]\n\t" - "sbc %[c], %[c]\n\t" + "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r7" @@ -6286,137 +6290,137 @@ SP_NOINLINE static sp_digit sp_3072_add_32(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[b], #0]\n\t" - "adds r4, r5\n\t" + "adds r4, r4, r5\n\t" "str r4, [%[r], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #124]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" + "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5" @@ -6528,209 +6532,209 @@ SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r7, #0\n\t" "mvn r7, r7\n\t" "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[b], #0]\n\t" - "adds r4, r5\n\t" + "adds r4, r4, r5\n\t" "str r4, [%[r], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #124]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "add %[r], #0x80\n\t" - "adds %[c], r7\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" + "add %[r], %[r], #0x80\n\t" + "adds %[c], %[c], r7\n\t" "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[b], #0]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #60]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" + "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r7" @@ -6750,402 +6754,402 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r3, [%[a], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #0]\n\t" "ldr r6, [%[b], #4]\n\t" - "subs r3, r5\n\t" - "sbcs r4, r6\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #0]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "sub r5, r5, %[c]\n\t" "ldr r3, [%[a], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #0]\n\t" "ldr r6, [%[b], #4]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #0]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "sub r5, r5, %[c]\n\t" "ldr r3, [%[a], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #0]\n\t" "ldr r6, [%[b], #4]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #0]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" + "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6" @@ -7165,407 +7169,407 @@ SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r7, #0\n\t" "mvn r7, r7\n\t" "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[b], #0]\n\t" - "adds r4, r5\n\t" + "adds r4, r4, r5\n\t" "str r4, [%[r], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #124]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "add %[r], #0x80\n\t" - "adds %[c], r7\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" + "add %[r], %[r], #0x80\n\t" + "adds %[c], %[c], r7\n\t" "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[b], #0]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #124]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" - "add %[r], #0x80\n\t" - "adds %[c], r7\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" + "add %[r], %[r], #0x80\n\t" + "adds %[c], %[c], r7\n\t" "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[b], #0]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "ldr r5, [%[b], #32]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #36]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "ldr r5, [%[b], #40]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #44]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "ldr r5, [%[b], #48]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #52]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "ldr r5, [%[b], #56]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #60]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "ldr r5, [%[b], #64]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #68]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #68]\n\t" "ldr r4, [%[a], #72]\n\t" "ldr r5, [%[b], #72]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #76]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #76]\n\t" "ldr r4, [%[a], #80]\n\t" "ldr r5, [%[b], #80]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #84]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #84]\n\t" "ldr r4, [%[a], #88]\n\t" "ldr r5, [%[b], #88]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #92]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #92]\n\t" "ldr r4, [%[a], #96]\n\t" "ldr r5, [%[b], #96]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #100]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #100]\n\t" "ldr r4, [%[a], #104]\n\t" "ldr r5, [%[b], #104]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #108]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #108]\n\t" "ldr r4, [%[a], #112]\n\t" "ldr r5, [%[b], #112]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #116]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #116]\n\t" "ldr r4, [%[a], #120]\n\t" "ldr r5, [%[b], #120]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #124]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #124]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" + "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r7" @@ -7676,25 +7680,25 @@ SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r6, %[a]\n\t" "mov r7, #0\n\t" "mov r4, #1\n\t" - "lsl r4, #8\n\t" - "add r4, #128\n\t" - "sub r7, #1\n\t" - "add r6, r4\n\t" + "lsl r4, r4, #8\n\t" + "add r4, r4, #128\n\t" + "sub r7, r7, #1\n\t" + "add r6, r6, r4\n\t" "\n1:\n\t" - "adds %[c], r7\n\t" + "adds %[c], %[c], r7\n\t" "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r]]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -7716,26 +7720,26 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r7, %[a]\n\t" "mov r5, #1\n\t" - "lsl r5, #8\n\t" - "add r5, #128\n\t" - "add r7, r5\n\t" + "lsl r5, r5, #8\n\t" + "add r5, r5, #128\n\t" + "add r7, r7, r5\n\t" "\n1:\n\t" "mov r5, #0\n\t" - "subs r5, %[c]\n\t" + "subs r5, r5, %[c]\n\t" "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #8\n\t" - "add %[b], #8\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #8\n\t" + "add %[b], %[b], #8\n\t" "cmp %[a], r7\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) @@ -7758,7 +7762,7 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, const sp_digit* b) { sp_digit tmp[96 * 2]; - __asm__ __volatile__ ( + asm volatile ( "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r8, r3\n\t" @@ -7767,39 +7771,39 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, "mov r10, %[b]\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, #128\n\t" - "add r6, r9\n\t" + "add r6, r6, #128\n\t" + "add r6, r6, r9\n\t" "mov r12, r6\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, #124\n\t" + "add r6, r6, #124\n\t" "mov %[a], r8\n\t" - "subs %[a], r6\n\t" - "sbc r6, r6\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" - "and %[a], r6\n\t" + "and %[a], %[a], r6\n\t" "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" + "sub %[b], %[b], %[a]\n\t" + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" "\n2:\n\t" - "# Multiply Start\n\t" + /* Multiply Start */ "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply Done */ + "add %[a], %[a], #4\n\t" + "sub %[b], %[b], #4\n\t" "cmp %[a], r12\n\t" "beq 3f\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" "ble 2b\n\t" "\n3:\n\t" @@ -7808,11 +7812,11 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, "str r3, [%[r], r7]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add r7, #4\n\t" + "add r7, r7, #4\n\t" "mov r8, r7\n\t" "mov r6, #2\n\t" "lsl r6, r6, #8\n\t" - "add r6, #248\n\t" + "add r6, r6, #248\n\t" "cmp r7, r6\n\t" "ble 1b\n\t" "str r3, [%[r], r7]\n\t" @@ -7833,7 +7837,7 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, */ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) { - __asm__ __volatile__ ( + asm volatile ( "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r5, #0\n\t" @@ -7842,59 +7846,59 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "mov r6, #3\n\t" "lsl r6, r6, #8\n\t" "neg r6, r6\n\t" - "add sp, r6\n\t" + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, #124\n\t" + "add r6, r6, #124\n\t" "mov %[a], r8\n\t" - "subs %[a], r6\n\t" - "sbc r6, r6\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" - "and %[a], r6\n\t" + "and %[a], %[a], r6\n\t" "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" + "sub r2, r2, %[a]\n\t" + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" "beq 4f\n\t" - "# Multiply * 2: Start\n\t" + /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# Multiply * 2: Done\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply * 2: Done */ "bal 5f\n\t" "\n4:\n\t" - "# Square: Start\n\t" + /* Square: Start */ "ldr r6, [%[a]]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# Square: Done\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Square: Done */ "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" + "add %[a], %[a], #4\n\t" + "sub r2, r2, #4\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, #128\n\t" - "add r6, r9\n\t" + "add r6, r6, #128\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" "beq 3f\n\t" "cmp %[a], r2\n\t" "bgt 3f\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" "ble 2b\n\t" "\n3:\n\t" @@ -7904,11 +7908,11 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "add r7, #4\n\t" + "add r7, r7, #4\n\t" "mov r8, r7\n\t" "mov r6, #2\n\t" "lsl r6, r6, #8\n\t" - "add r6, #248\n\t" + "add r6, r6, #248\n\t" "cmp r7, r6\n\t" "ble 1b\n\t" "mov %[a], r9\n\t" @@ -7917,15 +7921,15 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "mov %[a], r10\n\t" "mov r3, #2\n\t" "lsl r3, r3, #8\n\t" - "add r3, #252\n\t" + "add r3, r3, #252\n\t" "\n4:\n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "subs r3, #4\n\t" + "subs r3, r3, #4\n\t" "bge 4b\n\t" "mov r6, #3\n\t" "lsl r6, r6, #8\n\t" - "add sp, r6\n\t" + "add sp, sp, r6\n\t" : : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -7933,8 +7937,8 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ - !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* AND m into each word of a and store in r. * @@ -7963,22 +7967,22 @@ SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r6, %[a]\n\t" "mov r7, #0\n\t" - "add r6, #192\n\t" - "sub r7, #1\n\t" + "add r6, r6, #192\n\t" + "sub r7, r7, #1\n\t" "\n1:\n\t" - "adds %[c], r7\n\t" + "adds %[c], %[c], r7\n\t" "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r]]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -8001,7 +8005,7 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, const sp_digit* b) { sp_digit tmp[48 * 2]; - __asm__ __volatile__ ( + asm volatile ( "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r8, r3\n\t" @@ -8009,36 +8013,36 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, "mov r9, %[a]\n\t" "mov r10, %[b]\n\t" "mov r6, #192\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "mov r12, r6\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" "mov r6, #188\n\t" "mov %[a], r8\n\t" - "subs %[a], r6\n\t" - "sbc r6, r6\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" - "and %[a], r6\n\t" + "and %[a], %[a], r6\n\t" "mov %[b], r8\n\t" - "sub %[b], %[a]\n\t" - "add %[a], r9\n\t" - "add %[b], r10\n\t" + "sub %[b], %[b], %[a]\n\t" + "add %[a], %[a], r9\n\t" + "add %[b], %[b], r10\n\t" "\n2:\n\t" - "# Multiply Start\n\t" + /* Multiply Start */ "ldr r6, [%[a]]\n\t" "ldr r7, [%[b]]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# Multiply Done\n\t" - "add %[a], #4\n\t" - "sub %[b], #4\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply Done */ + "add %[a], %[a], #4\n\t" + "sub %[b], %[b], #4\n\t" "cmp %[a], r12\n\t" "beq 3f\n\t" "mov r6, r8\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" "ble 2b\n\t" "\n3:\n\t" @@ -8047,11 +8051,11 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, "str r3, [%[r], r7]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add r7, #4\n\t" + "add r7, r7, #4\n\t" "mov r8, r7\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, #120\n\t" + "add r6, r6, #120\n\t" "cmp r7, r6\n\t" "ble 1b\n\t" "str r3, [%[r], r7]\n\t" @@ -8072,7 +8076,7 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, */ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) { - __asm__ __volatile__ ( + asm volatile ( "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r5, #0\n\t" @@ -8080,57 +8084,57 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "mov r11, %[r]\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, #128\n\t" + "add r6, r6, #128\n\t" "neg r6, r6\n\t" - "add sp, r6\n\t" + "add sp, sp, r6\n\t" "mov r10, sp\n\t" "mov r9, %[a]\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r6, #188\n\t" "mov %[a], r8\n\t" - "subs %[a], r6\n\t" - "sbc r6, r6\n\t" + "subs %[a], %[a], r6\n\t" + "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" - "and %[a], r6\n\t" + "and %[a], %[a], r6\n\t" "mov r2, r8\n\t" - "sub r2, %[a]\n\t" - "add %[a], r9\n\t" - "add r2, r9\n\t" + "sub r2, r2, %[a]\n\t" + "add %[a], %[a], r9\n\t" + "add r2, r2, r9\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" "beq 4f\n\t" - "# Multiply * 2: Start\n\t" + /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" "ldr r7, [r2]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# Multiply * 2: Done\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Multiply * 2: Done */ "bal 5f\n\t" "\n4:\n\t" - "# Square: Start\n\t" + /* Square: Start */ "ldr r6, [%[a]]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# Square: Done\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* Square: Done */ "\n5:\n\t" - "add %[a], #4\n\t" - "sub r2, #4\n\t" + "add %[a], %[a], #4\n\t" + "sub r2, r2, #4\n\t" "mov r6, #192\n\t" - "add r6, r9\n\t" + "add r6, r6, r9\n\t" "cmp %[a], r6\n\t" "beq 3f\n\t" "cmp %[a], r2\n\t" "bgt 3f\n\t" "mov r7, r8\n\t" - "add r7, r9\n\t" + "add r7, r7, r9\n\t" "cmp %[a], r7\n\t" "ble 2b\n\t" "\n3:\n\t" @@ -8140,11 +8144,11 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "add r7, #4\n\t" + "add r7, r7, #4\n\t" "mov r8, r7\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, #120\n\t" + "add r6, r6, #120\n\t" "cmp r7, r6\n\t" "ble 1b\n\t" "mov %[a], r9\n\t" @@ -8153,16 +8157,16 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "mov %[a], r10\n\t" "mov r3, #1\n\t" "lsl r3, r3, #8\n\t" - "add r3, #124\n\t" + "add r3, r3, #124\n\t" "\n4:\n\t" "ldr r6, [%[a], r3]\n\t" "str r6, [%[r], r3]\n\t" - "subs r3, #4\n\t" + "subs r3, r3, #4\n\t" "bge 4b\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, #128\n\t" - "add sp, r6\n\t" + "add r6, r6, #128\n\t" + "add sp, sp, r6\n\t" : : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" @@ -8170,7 +8174,8 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_SH) && */ + /* !WOLFSSL_RSA_PUBLIC_ONLY */ /* Caclulate the bottom digit of -1/a mod 2^n. * @@ -8200,31 +8205,31 @@ static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho) SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, const sp_digit b) { - __asm__ __volatile__ ( + asm volatile ( "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, #128\n\t" - "add r6, %[a]\n\t" + "add r6, r6, #128\n\t" + "add r6, r6, %[a]\n\t" "mov r8, %[r]\n\t" "mov r9, r6\n\t" "mov r3, #0\n\t" "mov r4, #0\n\t" - "1:\n\t" + "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" - "# A[] * B\n\t" + /* A[] * B */ "ldr r6, [%[a]]\n\t" "umull r6, r7, r6, %[b]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[] * B - Done\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[] * B - Done */ "mov %[r], r8\n\t" "str r3, [%[r]]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" + "add %[r], %[r], #4\n\t" + "add %[a], %[a], #4\n\t" "mov r8, %[r]\n\t" "cmp %[a], r9\n\t" "blt 1b\n\t" @@ -8235,8 +8240,8 @@ SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, ); } -#if !defined(SP_RSA_PRIVATE_EXP_D) && defined(WOLFSSL_HAVE_SP_RSA) && \ - !defined(WOLFSSL_RSA_PUBLIC_ONLY) +#if (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY) #ifdef WOLFSSL_SP_SMALL /* Sub b from a into a. (a -= b) * @@ -8247,23 +8252,23 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r7, %[a]\n\t" - "add r7, #192\n\t" + "add r7, r7, #192\n\t" "\n1:\n\t" "mov r5, #0\n\t" - "subs r5, %[c]\n\t" + "subs r5, r5, %[c]\n\t" "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #8\n\t" - "add %[b], #8\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #8\n\t" + "add %[b], %[b], #8\n\t" "cmp %[a], r7\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) @@ -8286,205 +8291,205 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r3, [%[a], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #0]\n\t" "ldr r6, [%[b], #4]\n\t" - "subs r3, r5\n\t" - "sbcs r4, r6\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #0]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" "ldr r3, [%[a], #64]\n\t" "ldr r4, [%[a], #68]\n\t" "ldr r5, [%[b], #64]\n\t" "ldr r6, [%[b], #68]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #64]\n\t" "str r4, [%[a], #68]\n\t" "ldr r3, [%[a], #72]\n\t" "ldr r4, [%[a], #76]\n\t" "ldr r5, [%[b], #72]\n\t" "ldr r6, [%[b], #76]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #72]\n\t" "str r4, [%[a], #76]\n\t" "ldr r3, [%[a], #80]\n\t" "ldr r4, [%[a], #84]\n\t" "ldr r5, [%[b], #80]\n\t" "ldr r6, [%[b], #84]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #80]\n\t" "str r4, [%[a], #84]\n\t" "ldr r3, [%[a], #88]\n\t" "ldr r4, [%[a], #92]\n\t" "ldr r5, [%[b], #88]\n\t" "ldr r6, [%[b], #92]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #88]\n\t" "str r4, [%[a], #92]\n\t" "ldr r3, [%[a], #96]\n\t" "ldr r4, [%[a], #100]\n\t" "ldr r5, [%[b], #96]\n\t" "ldr r6, [%[b], #100]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #96]\n\t" "str r4, [%[a], #100]\n\t" "ldr r3, [%[a], #104]\n\t" "ldr r4, [%[a], #108]\n\t" "ldr r5, [%[b], #104]\n\t" "ldr r6, [%[b], #108]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #104]\n\t" "str r4, [%[a], #108]\n\t" "ldr r3, [%[a], #112]\n\t" "ldr r4, [%[a], #116]\n\t" "ldr r5, [%[b], #112]\n\t" "ldr r6, [%[b], #116]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #112]\n\t" "str r4, [%[a], #116]\n\t" "ldr r3, [%[a], #120]\n\t" "ldr r4, [%[a], #124]\n\t" "ldr r5, [%[b], #120]\n\t" "ldr r6, [%[b], #124]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #120]\n\t" "str r4, [%[a], #124]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #0x80\n\t" - "add %[b], #0x80\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #0x80\n\t" + "add %[b], %[b], #0x80\n\t" "mov r5, #0\n\t" - "sub r5, %[c]\n\t" + "sub r5, r5, %[c]\n\t" "ldr r3, [%[a], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #0]\n\t" "ldr r6, [%[b], #4]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #0]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" "ldr r3, [%[a], #32]\n\t" "ldr r4, [%[a], #36]\n\t" "ldr r5, [%[b], #32]\n\t" "ldr r6, [%[b], #36]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #32]\n\t" "str r4, [%[a], #36]\n\t" "ldr r3, [%[a], #40]\n\t" "ldr r4, [%[a], #44]\n\t" "ldr r5, [%[b], #40]\n\t" "ldr r6, [%[b], #44]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #40]\n\t" "str r4, [%[a], #44]\n\t" "ldr r3, [%[a], #48]\n\t" "ldr r4, [%[a], #52]\n\t" "ldr r5, [%[b], #48]\n\t" "ldr r6, [%[b], #52]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #48]\n\t" "str r4, [%[a], #52]\n\t" "ldr r3, [%[a], #56]\n\t" "ldr r4, [%[a], #60]\n\t" "ldr r5, [%[b], #56]\n\t" "ldr r6, [%[b], #60]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #56]\n\t" "str r4, [%[a], #60]\n\t" - "sbc %[c], %[c]\n\t" + "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6" @@ -8521,20 +8526,20 @@ SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r5, #192\n\t" "mov r8, r5\n\t" "mov r7, #0\n\t" - "1:\n\t" + "\n1:\n\t" "ldr r6, [%[b], r7]\n\t" - "and r6, %[m]\n\t" + "and r6, r6, %[m]\n\t" "mov r5, #0\n\t" - "subs r5, %[c]\n\t" + "subs r5, r5, %[c]\n\t" "ldr r5, [%[a], r7]\n\t" - "sbcs r5, r6\n\t" - "sbcs %[c], %[c]\n\t" + "sbcs r5, r5, r6\n\t" + "sbcs %[c], %[c], %[c]\n\t" "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" + "add r7, r7, #4\n\t" "cmp r7, r8\n\t" "blt 1b\n\t" : [c] "+r" (c) @@ -8556,73 +8561,73 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, sp_digit* m, { sp_digit ca = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r8, %[mp]\n\t" "mov r12, %[ca]\n\t" "mov r14, %[m]\n\t" "mov r9, %[a]\n\t" "mov r4, #0\n\t" - "# i = 0\n\t" + /* i = 0 */ "mov r11, r4\n\t" "\n1:\n\t" "mov r5, #0\n\t" "mov %[ca], #0\n\t" - "# mu = a[i] * mp\n\t" + /* mu = a[i] * mp */ "mov %[mp], r8\n\t" "ldr %[a], [%[a]]\n\t" - "mul %[mp], %[a]\n\t" + "mul %[mp], %[mp], %[a]\n\t" "mov %[m], r14\n\t" "mov r10, r9\n\t" "\n2:\n\t" - "# a[i+j] += m[j] * mu\n\t" + /* a[i+j] += m[j] * mu */ "mov %[a], r10\n\t" "ldr %[a], [%[a]]\n\t" "mov %[ca], #0\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "# Multiply m[j] and mu - Start\n\t" + /* Multiply m[j] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" - "adds %[a], r6\n\t" - "adcs r5, r7\n\t" - "# Multiply m[j] and mu - Done\n\t" - "adds r4, %[a]\n\t" - "adc r5, %[ca]\n\t" + "adds %[a], %[a], r6\n\t" + "adcs r5, r5, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, %[ca]\n\t" "mov %[a], r10\n\t" "str r4, [%[a]]\n\t" "mov r6, #4\n\t" - "add %[m], #4\n\t" - "add r10, r6\n\t" + "add %[m], %[m], #4\n\t" + "add r10, r10, r6\n\t" "mov r4, #188\n\t" - "add r4, r9\n\t" + "add r4, r4, r9\n\t" "cmp r10, r4\n\t" "blt 2b\n\t" - "# a[i+47] += m[47] * mu\n\t" + /* a[i+47] += m[47] * mu */ "mov %[ca], #0\n\t" "mov r4, r12\n\t" "mov %[a], #0\n\t" - "# Multiply m[47] and mu - Start\n\t" + /* Multiply m[47] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" - "adds r5, r6\n\t" - "adcs r4, r7\n\t" - "adc %[a], %[ca]\n\t" - "# Multiply m[47] and mu - Done\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc %[a], %[a], %[ca]\n\t" + /* Multiply m[47] and mu - Done */ "mov %[ca], %[a]\n\t" "mov %[a], r10\n\t" "ldr r7, [%[a], #4]\n\t" "ldr %[a], [%[a]]\n\t" "mov r6, #0\n\t" - "adds r5, %[a]\n\t" - "adcs r7, r4\n\t" - "adc %[ca], r6\n\t" + "adds r5, r5, %[a]\n\t" + "adcs r7, r7, r4\n\t" + "adc %[ca], %[ca], r6\n\t" "mov %[a], r10\n\t" "str r5, [%[a]]\n\t" "str r7, [%[a], #4]\n\t" - "# i += 1\n\t" + /* i += 1 */ "mov r6, #4\n\t" - "add r9, r6\n\t" - "add r11, r6\n\t" + "add r9, r9, r6\n\t" + "add r11, r11, r6\n\t" "mov r12, %[ca]\n\t" "mov %[a], r9\n\t" "mov r4, #192\n\t" @@ -8676,29 +8681,29 @@ static void sp_3072_mont_sqr_48(sp_digit* r, sp_digit* a, sp_digit* m, SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, const sp_digit b) { - __asm__ __volatile__ ( + asm volatile ( "mov r6, #192\n\t" - "add r6, %[a]\n\t" + "add r6, r6, %[a]\n\t" "mov r8, %[r]\n\t" "mov r9, r6\n\t" "mov r3, #0\n\t" "mov r4, #0\n\t" - "1:\n\t" + "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" - "# A[] * B\n\t" + /* A[] * B */ "ldr r6, [%[a]]\n\t" "umull r6, r7, r6, %[b]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[] * B - Done\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[] * B - Done */ "mov %[r], r8\n\t" "str r3, [%[r]]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" + "add %[r], %[r], #4\n\t" + "add %[a], %[a], #4\n\t" "mov r8, %[r]\n\t" "cmp %[a], r9\n\t" "blt 1b\n\t" @@ -8723,37 +8728,37 @@ SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, { sp_digit r = 0; - __asm__ __volatile__ ( - "lsr r6, %[div], 16\n\t\n\t" - "add r6, r6, 1\n\t\n\t" - "udiv r4, %[d1], r6\n\t\n\t" - "lsl r7, r4, 16\n\t\n\t" - "umull r4, r5, %[div], r7\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "udiv r5, %[d1], r6\n\t\n\t" - "lsl r4, r5, 16\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "lsl r4, %[d1], 16\n\t\n\t" - "orr r4, r4, %[d0], lsr 16\n\t\n\t" - "udiv r4, r4, r6\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "lsl r4, %[d1], 16\n\t\n\t" - "orr r4, r4, %[d0], lsr 16\n\t\n\t" - "udiv r4, r4, r6\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "udiv r4, %[d0], %[div]\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "mov %[r], r7\n\t\n\t" + asm volatile ( + "lsr r6, %[div], #16\n\t" + "add r6, r6, #1\n\t" + "udiv r4, %[d1], r6\n\t" + "lsl r7, r4, #16\n\t" + "umull r4, r5, %[div], r7\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r5, %[d1], r6\n\t" + "lsl r4, r5, #16\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr 16\n\t" + "udiv r4, r4, r6\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr 16\n\t" + "udiv r4, r4, r6\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r4, %[d0], %[div]\n\t" + "add r7, r7, r4\n\t" + "mov %[r], r7\n\t" : [r] "+r" (r) : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) : "r4", "r5", "r6", "r7" @@ -8773,27 +8778,27 @@ SP_NOINLINE static int32_t sp_3072_cmp_48(sp_digit* a, sp_digit* b) sp_digit r = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r3, #0\n\t" "mvn r3, r3\n\t" "mov r6, #188\n\t" - "1:\n\t" + "\n1:\n\t" "ldr r7, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" - "and r7, r3\n\t" - "and r5, r3\n\t" + "and r7, r7, r3\n\t" + "and r5, r5, r3\n\t" "mov r4, r7\n\t" - "subs r7, r5\n\t" - "sbc r7, r7\n\t" - "add %[r], r7\n\t" + "subs r7, r7, r5\n\t" + "sbc r7, r7, r7\n\t" + "add %[r], %[r], r7\n\t" "mvn r7, r7\n\t" - "and r3, r7\n\t" - "subs r5, r4\n\t" - "sbc r7, r7\n\t" - "sub %[r], r7\n\t" + "and r3, r3, r7\n\t" + "subs r5, r5, r4\n\t" + "sbc r7, r7, r7\n\t" + "sub %[r], %[r], r7\n\t" "mvn r7, r7\n\t" - "and r3, r7\n\t" - "sub r6, #4\n\t" + "and r3, r3, r7\n\t" + "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" "bge 1b\n\t" : [r] "+r" (r) @@ -9127,7 +9132,8 @@ static int sp_3072_mod_exp_48(sp_digit* r, sp_digit* a, sp_digit* e, } #endif /* WOLFSSL_SP_SMALL */ -#endif /* !SP_RSA_PRIVATE_EXP_D && WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY */ +#endif /* (WOLFSSL_HAVE_SP_RSA || WOLFSSL_HAVE_SP_SH) && */ + /* !WOLFSSL_RSA_PUBLIC_ONLY */ #ifdef WOLFSSL_HAVE_SP_DH /* r = 2^n mod m where n is the number of bits to reduce by. @@ -9158,22 +9164,22 @@ SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r5, #1\n\t" "lsl r5, r5, #8\n\t" - "add r5, #128\n\t" + "add r5, r5, #128\n\t" "mov r8, r5\n\t" "mov r7, #0\n\t" - "1:\n\t" + "\n1:\n\t" "ldr r6, [%[b], r7]\n\t" - "and r6, %[m]\n\t" + "and r6, r6, %[m]\n\t" "mov r5, #0\n\t" - "subs r5, %[c]\n\t" + "subs r5, r5, %[c]\n\t" "ldr r5, [%[a], r7]\n\t" - "sbcs r5, r6\n\t" - "sbcs %[c], %[c]\n\t" + "sbcs r5, r5, r6\n\t" + "sbcs %[c], %[c], %[c]\n\t" "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" + "add r7, r7, #4\n\t" "cmp r7, r8\n\t" "blt 1b\n\t" : [c] "+r" (c) @@ -9195,80 +9201,80 @@ SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, sp_digit* m, { sp_digit ca = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r8, %[mp]\n\t" "mov r12, %[ca]\n\t" "mov r14, %[m]\n\t" "mov r9, %[a]\n\t" "mov r4, #0\n\t" - "# i = 0\n\t" + /* i = 0 */ "mov r11, r4\n\t" "\n1:\n\t" "mov r5, #0\n\t" "mov %[ca], #0\n\t" - "# mu = a[i] * mp\n\t" + /* mu = a[i] * mp */ "mov %[mp], r8\n\t" "ldr %[a], [%[a]]\n\t" - "mul %[mp], %[a]\n\t" + "mul %[mp], %[mp], %[a]\n\t" "mov %[m], r14\n\t" "mov r10, r9\n\t" "\n2:\n\t" - "# a[i+j] += m[j] * mu\n\t" + /* a[i+j] += m[j] * mu */ "mov %[a], r10\n\t" "ldr %[a], [%[a]]\n\t" "mov %[ca], #0\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "# Multiply m[j] and mu - Start\n\t" + /* Multiply m[j] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" - "adds %[a], r6\n\t" - "adcs r5, r7\n\t" - "# Multiply m[j] and mu - Done\n\t" - "adds r4, %[a]\n\t" - "adc r5, %[ca]\n\t" + "adds %[a], %[a], r6\n\t" + "adcs r5, r5, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, %[ca]\n\t" "mov %[a], r10\n\t" "str r4, [%[a]]\n\t" "mov r6, #4\n\t" - "add %[m], #4\n\t" - "add r10, r6\n\t" + "add %[m], %[m], #4\n\t" + "add r10, r10, r6\n\t" "mov r4, #1\n\t" "lsl r4, r4, #8\n\t" - "add r4, #124\n\t" - "add r4, r9\n\t" + "add r4, r4, #124\n\t" + "add r4, r4, r9\n\t" "cmp r10, r4\n\t" "blt 2b\n\t" - "# a[i+95] += m[95] * mu\n\t" + /* a[i+95] += m[95] * mu */ "mov %[ca], #0\n\t" "mov r4, r12\n\t" "mov %[a], #0\n\t" - "# Multiply m[95] and mu - Start\n\t" + /* Multiply m[95] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" - "adds r5, r6\n\t" - "adcs r4, r7\n\t" - "adc %[a], %[ca]\n\t" - "# Multiply m[95] and mu - Done\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc %[a], %[a], %[ca]\n\t" + /* Multiply m[95] and mu - Done */ "mov %[ca], %[a]\n\t" "mov %[a], r10\n\t" "ldr r7, [%[a], #4]\n\t" "ldr %[a], [%[a]]\n\t" "mov r6, #0\n\t" - "adds r5, %[a]\n\t" - "adcs r7, r4\n\t" - "adc %[ca], r6\n\t" + "adds r5, r5, %[a]\n\t" + "adcs r7, r7, r4\n\t" + "adc %[ca], %[ca], r6\n\t" "mov %[a], r10\n\t" "str r5, [%[a]]\n\t" "str r7, [%[a], #4]\n\t" - "# i += 1\n\t" + /* i += 1 */ "mov r6, #4\n\t" - "add r9, r6\n\t" - "add r11, r6\n\t" + "add r9, r9, r6\n\t" + "add r11, r11, r6\n\t" "mov r12, %[ca]\n\t" "mov %[a], r9\n\t" "mov r4, #1\n\t" "lsl r4, r4, #8\n\t" - "add r4, #128\n\t" + "add r4, r4, #128\n\t" "cmp r11, r4\n\t" "blt 1b\n\t" "mov %[m], r14\n\t" @@ -9324,37 +9330,37 @@ SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, { sp_digit r = 0; - __asm__ __volatile__ ( - "lsr r6, %[div], 16\n\t\n\t" - "add r6, r6, 1\n\t\n\t" - "udiv r4, %[d1], r6\n\t\n\t" - "lsl r7, r4, 16\n\t\n\t" - "umull r4, r5, %[div], r7\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "udiv r5, %[d1], r6\n\t\n\t" - "lsl r4, r5, 16\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "lsl r4, %[d1], 16\n\t\n\t" - "orr r4, r4, %[d0], lsr 16\n\t\n\t" - "udiv r4, r4, r6\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "lsl r4, %[d1], 16\n\t\n\t" - "orr r4, r4, %[d0], lsr 16\n\t\n\t" - "udiv r4, r4, r6\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "udiv r4, %[d0], %[div]\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "mov %[r], r7\n\t\n\t" + asm volatile ( + "lsr r6, %[div], #16\n\t" + "add r6, r6, #1\n\t" + "udiv r4, %[d1], r6\n\t" + "lsl r7, r4, #16\n\t" + "umull r4, r5, %[div], r7\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r5, %[d1], r6\n\t" + "lsl r4, r5, #16\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr 16\n\t" + "udiv r4, r4, r6\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr 16\n\t" + "udiv r4, r4, r6\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r4, %[d0], %[div]\n\t" + "add r7, r7, r4\n\t" + "mov %[r], r7\n\t" : [r] "+r" (r) : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) : "r4", "r5", "r6", "r7" @@ -9403,29 +9409,29 @@ SP_NOINLINE static int32_t sp_3072_cmp_96(sp_digit* a, sp_digit* b) sp_digit r = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r3, #0\n\t" "mvn r3, r3\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, #124\n\t" - "1:\n\t" + "add r6, r6, #124\n\t" + "\n1:\n\t" "ldr r7, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" - "and r7, r3\n\t" - "and r5, r3\n\t" + "and r7, r7, r3\n\t" + "and r5, r5, r3\n\t" "mov r4, r7\n\t" - "subs r7, r5\n\t" - "sbc r7, r7\n\t" - "add %[r], r7\n\t" + "subs r7, r7, r5\n\t" + "sbc r7, r7, r7\n\t" + "add %[r], %[r], r7\n\t" "mvn r7, r7\n\t" - "and r3, r7\n\t" - "subs r5, r4\n\t" - "sbc r7, r7\n\t" - "sub %[r], r7\n\t" + "and r3, r3, r7\n\t" + "subs r5, r5, r4\n\t" + "sbc r7, r7, r7\n\t" + "sub %[r], %[r], r7\n\t" "mvn r7, r7\n\t" - "and r3, r7\n\t" - "sub r6, #4\n\t" + "and r3, r3, r7\n\t" + "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" "bge 1b\n\t" : [r] "+r" (r) @@ -10163,6 +10169,7 @@ int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) return err; } +#ifdef WOLFSSL_HAVE_SP_DH /* Perform the modular exponentiation for Diffie-Hellman. * * base Base. @@ -10210,6 +10217,7 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, return err; } +#endif /* WOLFSSL_HAVE_SP_DH */ /* Perform the modular exponentiation for Diffie-Hellman. * @@ -10333,9 +10341,9 @@ static sp_digit p256_b[8] = { #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) /* Allocate memory for point and return error. */ -#define sp_ecc_point_new(heap, sp, p) \ - ((p = XMALLOC(sizeof(sp_point), heap, DYNAMIC_TYPE_ECC)) == NULL) ? \ - MEMORY_E : MP_OKAY +#define sp_ecc_point_new(heap, sp, p) \ + ((p = (sp_point*)XMALLOC(sizeof(sp_point), heap, DYNAMIC_TYPE_ECC)) \ + == NULL) ? MEMORY_E : MP_OKAY #else /* Set pointer to data and return no error. */ #define sp_ecc_point_new(heap, sp, p) ((p = &sp) == NULL) ? MEMORY_E : MP_OKAY @@ -10608,27 +10616,27 @@ SP_NOINLINE static int32_t sp_256_cmp_8(sp_digit* a, sp_digit* b) sp_digit r = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r3, #0\n\t" "mvn r3, r3\n\t" "mov r6, #28\n\t" - "1:\n\t" + "\n1:\n\t" "ldr r7, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" - "and r7, r3\n\t" - "and r5, r3\n\t" + "and r7, r7, r3\n\t" + "and r5, r5, r3\n\t" "mov r4, r7\n\t" - "subs r7, r5\n\t" - "sbc r7, r7\n\t" - "add %[r], r7\n\t" + "subs r7, r7, r5\n\t" + "sbc r7, r7, r7\n\t" + "add %[r], %[r], r7\n\t" "mvn r7, r7\n\t" - "and r3, r7\n\t" - "subs r5, r4\n\t" - "sbc r7, r7\n\t" - "sub %[r], r7\n\t" + "and r3, r3, r7\n\t" + "subs r5, r5, r4\n\t" + "sbc r7, r7, r7\n\t" + "sub %[r], %[r], r7\n\t" "mvn r7, r7\n\t" - "and r3, r7\n\t" - "sub r6, #4\n\t" + "and r3, r3, r7\n\t" + "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" "bge 1b\n\t" : [r] "+r" (r) @@ -10658,20 +10666,20 @@ SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r5, #32\n\t" "mov r8, r5\n\t" "mov r7, #0\n\t" - "1:\n\t" + "\n1:\n\t" "ldr r6, [%[b], r7]\n\t" - "and r6, %[m]\n\t" + "and r6, r6, %[m]\n\t" "mov r5, #0\n\t" - "subs r5, %[c]\n\t" + "subs r5, r5, %[c]\n\t" "ldr r5, [%[a], r7]\n\t" - "sbcs r5, r6\n\t" - "sbcs %[c], %[c]\n\t" + "sbcs r5, r5, r6\n\t" + "sbcs %[c], %[c], %[c]\n\t" "str r5, [%[r], r7]\n\t" - "add r7, #4\n\t" + "add r7, r7, #4\n\t" "cmp r7, r8\n\t" "blt 1b\n\t" : [c] "+r" (c) @@ -10694,88 +10702,88 @@ SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, sp_digit* m, (void)mp; (void)m; - __asm__ __volatile__ ( + asm volatile ( "mov r2, #0\n\t" "mov r1, #0\n\t" - "# i = 0\n\t" + /* i = 0 */ "mov r8, r2\n\t" "\n1:\n\t" "mov r4, #0\n\t" - "# mu = a[i] * 1 (mp) = a[i]\n\t" + /* mu = a[i] * 1 (mp) = a[i] */ "ldr r3, [%[a]]\n\t" - "# a[i] += -1 * mu = -1 * a[i] => a[i] = 0 no carry\n\t" - "# a[i+1] += -1 * mu\n\t" + /* a[i] += -1 * mu = -1 * a[i] => a[i] = 0 no carry */ + /* a[i+1] += -1 * mu */ "ldr r6, [%[a], #4]\n\t" "mov r5, #0\n\t" - "adds r4, r6\n\t" - "adc r5, r2\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r2\n\t" "str r4, [%[a], #4]\n\t" - "# a[i+2] += -1 * mu\n\t" + /* a[i+2] += -1 * mu */ "ldr r6, [%[a], #8]\n\t" "mov r4, #0\n\t" - "adds r5, r6\n\t" - "adc r4, r2\n\t" + "adds r5, r5, r6\n\t" + "adc r4, r4, r2\n\t" "str r5, [%[a], #8]\n\t" - "# a[i+3] += 0 * mu\n\t" + /* a[i+3] += 0 * mu */ "ldr r6, [%[a], #12]\n\t" "mov r5, #0\n\t" - "adds r4, r3\n\t" - "adc r5, r2\n\t" - "adds r4, r6\n\t" - "adc r5, r2\n\t" + "adds r4, r4, r3\n\t" + "adc r5, r5, r2\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r2\n\t" "str r4, [%[a], #12]\n\t" - "# a[i+4] += 0 * mu\n\t" + /* a[i+4] += 0 * mu */ "ldr r6, [%[a], #16]\n\t" "mov r4, #0\n\t" - "adds r5, r6\n\t" - "adc r4, r2\n\t" + "adds r5, r5, r6\n\t" + "adc r4, r4, r2\n\t" "str r5, [%[a], #16]\n\t" - "# a[i+5] += 0 * mu\n\t" + /* a[i+5] += 0 * mu */ "ldr r6, [%[a], #20]\n\t" "mov r5, #0\n\t" - "adds r4, r6\n\t" - "adc r5, r2\n\t" + "adds r4, r4, r6\n\t" + "adc r5, r5, r2\n\t" "str r4, [%[a], #20]\n\t" - "# a[i+6] += 1 * mu\n\t" + /* a[i+6] += 1 * mu */ "ldr r6, [%[a], #24]\n\t" "mov r4, #0\n\t" - "adds r5, r3\n\t" - "adc r4, r2\n\t" - "adds r5, r6\n\t" - "adc r4, r2\n\t" + "adds r5, r5, r3\n\t" + "adc r4, r4, r2\n\t" + "adds r5, r5, r6\n\t" + "adc r4, r4, r2\n\t" "str r5, [%[a], #24]\n\t" - "# a[i+7] += -1 * mu\n\t" + /* a[i+7] += -1 * mu */ "ldr r6, [%[a], #28]\n\t" "ldr r7, [%[a], #32]\n\t" "adds r5, r1, r3\n\t" "mov r1, #0\n\t" - "adc r1, r2\n\t" - "subs r4, r3\n\t" - "sbcs r5, r2\n\t" - "sbc r1, r2\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r1, r2\n\t" + "adc r1, r1, r2\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, r2\n\t" + "sbc r1, r1, r2\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r1, r1, r2\n\t" "str r4, [%[a], #28]\n\t" "str r5, [%[a], #32]\n\t" - "# i += 1\n\t" - "add r8, #1\n\t" - "add %[a], #4\n\t" + /* i += 1 */ + "add r8, r8, #1\n\t" + "add %[a], %[a], #4\n\t" "mov r6, #8\n\t" "cmp r8, r6\n\t" "blt 1b\n\t" - "sub %[a], #32\n\t" + "sub %[a], %[a], #32\n\t" "mov r3, r1\n\t" - "sub r1, #1\n\t" + "sub r1, r1, #1\n\t" "mvn r1, r1\n\t" "ldr r4, [%[a],#32]\n\t" "ldr r5, [%[a],#36]\n\t" "ldr r6, [%[a],#40]\n\t" "ldr r7, [%[a],#44]\n\t" - "subs r4, r1\n\t" - "sbcs r5, r1\n\t" - "sbcs r6, r1\n\t" - "sbcs r7, r2\n\t" + "subs r4, r4, r1\n\t" + "sbcs r5, r5, r1\n\t" + "sbcs r6, r6, r1\n\t" + "sbcs r7, r7, r2\n\t" "str r4, [%[a],#0]\n\t" "str r5, [%[a],#4]\n\t" "str r6, [%[a],#8]\n\t" @@ -10784,10 +10792,10 @@ SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, sp_digit* m, "ldr r5, [%[a],#52]\n\t" "ldr r6, [%[a],#56]\n\t" "ldr r7, [%[a],#60]\n\t" - "sbcs r4, r2\n\t" - "sbcs r5, r2\n\t" - "sbcs r6, r3\n\t" - "sbc r7, r1\n\t" + "sbcs r4, r4, r2\n\t" + "sbcs r5, r5, r2\n\t" + "sbcs r6, r6, r3\n\t" + "sbc r7, r7, r1\n\t" "str r4, [%[a],#16]\n\t" "str r5, [%[a],#20]\n\t" "str r6, [%[a],#24]\n\t" @@ -10813,73 +10821,73 @@ SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, sp_digit* m, { sp_digit ca = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r8, %[mp]\n\t" "mov r12, %[ca]\n\t" "mov r14, %[m]\n\t" "mov r9, %[a]\n\t" "mov r4, #0\n\t" - "# i = 0\n\t" + /* i = 0 */ "mov r11, r4\n\t" "\n1:\n\t" "mov r5, #0\n\t" "mov %[ca], #0\n\t" - "# mu = a[i] * mp\n\t" + /* mu = a[i] * mp */ "mov %[mp], r8\n\t" "ldr %[a], [%[a]]\n\t" - "mul %[mp], %[a]\n\t" + "mul %[mp], %[mp], %[a]\n\t" "mov %[m], r14\n\t" "mov r10, r9\n\t" "\n2:\n\t" - "# a[i+j] += m[j] * mu\n\t" + /* a[i+j] += m[j] * mu */ "mov %[a], r10\n\t" "ldr %[a], [%[a]]\n\t" "mov %[ca], #0\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "# Multiply m[j] and mu - Start\n\t" + /* Multiply m[j] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" - "adds %[a], r6\n\t" - "adcs r5, r7\n\t" - "# Multiply m[j] and mu - Done\n\t" - "adds r4, %[a]\n\t" - "adc r5, %[ca]\n\t" + "adds %[a], %[a], r6\n\t" + "adcs r5, r5, r7\n\t" + /* Multiply m[j] and mu - Done */ + "adds r4, r4, %[a]\n\t" + "adc r5, r5, %[ca]\n\t" "mov %[a], r10\n\t" "str r4, [%[a]]\n\t" "mov r6, #4\n\t" - "add %[m], #4\n\t" - "add r10, r6\n\t" + "add %[m], %[m], #4\n\t" + "add r10, r10, r6\n\t" "mov r4, #28\n\t" - "add r4, r9\n\t" + "add r4, r4, r9\n\t" "cmp r10, r4\n\t" "blt 2b\n\t" - "# a[i+7] += m[7] * mu\n\t" + /* a[i+7] += m[7] * mu */ "mov %[ca], #0\n\t" "mov r4, r12\n\t" "mov %[a], #0\n\t" - "# Multiply m[7] and mu - Start\n\t" + /* Multiply m[7] and mu - Start */ "ldr r7, [%[m]]\n\t" "umull r6, r7, %[mp], r7\n\t" - "adds r5, r6\n\t" - "adcs r4, r7\n\t" - "adc %[a], %[ca]\n\t" - "# Multiply m[7] and mu - Done\n\t" + "adds r5, r5, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc %[a], %[a], %[ca]\n\t" + /* Multiply m[7] and mu - Done */ "mov %[ca], %[a]\n\t" "mov %[a], r10\n\t" "ldr r7, [%[a], #4]\n\t" "ldr %[a], [%[a]]\n\t" "mov r6, #0\n\t" - "adds r5, %[a]\n\t" - "adcs r7, r4\n\t" - "adc %[ca], r6\n\t" + "adds r5, r5, %[a]\n\t" + "adcs r7, r7, r4\n\t" + "adc %[ca], %[ca], r6\n\t" "mov %[a], r10\n\t" "str r5, [%[a]]\n\t" "str r7, [%[a], #4]\n\t" - "# i += 1\n\t" + /* i += 1 */ "mov r6, #4\n\t" - "add r9, r6\n\t" - "add r11, r6\n\t" + "add r9, r9, r6\n\t" + "add r11, r11, r6\n\t" "mov r12, %[ca]\n\t" "mov %[a], r9\n\t" "mov r4, #32\n\t" @@ -10904,500 +10912,500 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) { sp_digit tmp[8]; - __asm__ __volatile__ ( + asm volatile ( "mov r8, %[r]\n\t" "mov %[r], #0\n\t" - "# A[0] * B[0]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 0]\n\t" + /* A[0] * B[0] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r3, r4, r6, r7\n\t" "mov r5, #0\n\t" - "str r3, [%[tmp], 0]\n\t" + "str r3, [%[tmp], #0]\n\t" "mov r3, #0\n\t" - "# A[0] * B[1]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 4]\n\t" + /* A[0] * B[1] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * B[0]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * B[0] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 4]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #4]\n\t" "mov r4, #0\n\t" - "# A[0] * B[2]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 8]\n\t" + /* A[0] * B[2] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[1] * B[1]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[1] * B[1] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * B[0]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * B[0] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [%[tmp], 8]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [%[tmp], #8]\n\t" "mov r5, #0\n\t" - "# A[0] * B[3]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 12]\n\t" + /* A[0] * B[3] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[1] * B[2]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[1] * B[2] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[2] * B[1]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[2] * B[1] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * B[0]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * B[0] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [%[tmp], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [%[tmp], #12]\n\t" "mov r3, #0\n\t" - "# A[0] * B[4]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 16]\n\t" + /* A[0] * B[4] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * B[3]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * B[3] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[2] * B[2]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[2] * B[2] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[3] * B[1]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[3] * B[1] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[4] * B[0]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[4] * B[0] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #16]\n\t" "mov r4, #0\n\t" - "# A[0] * B[5]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 20]\n\t" + /* A[0] * B[5] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[1] * B[4]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[1] * B[4] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * B[3]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * B[3] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[3] * B[2]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[3] * B[2] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[4] * B[1]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[4] * B[1] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[5] * B[0]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[5] * B[0] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [%[tmp], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [%[tmp], #20]\n\t" "mov r5, #0\n\t" - "# A[0] * B[6]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 24]\n\t" + /* A[0] * B[6] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[1] * B[5]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[1] * B[5] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[2] * B[4]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[2] * B[4] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * B[3]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * B[3] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[4] * B[2]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[4] * B[2] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[5] * B[1]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[5] * B[1] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[6] * B[0]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[6] * B[0] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [%[tmp], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [%[tmp], #24]\n\t" "mov r3, #0\n\t" - "# A[0] * B[7]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[0] * B[7] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * B[6]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * B[6] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[2] * B[5]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[2] * B[5] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[3] * B[4]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[3] * B[4] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[4] * B[3]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[4] * B[3] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[5] * B[2]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[5] * B[2] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[6] * B[1]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[6] * B[1] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[7] * B[0]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 0]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[7] * B[0] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #0]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 28]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #28]\n\t" "mov r4, #0\n\t" - "# A[1] * B[7]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[1] * B[7] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * B[6]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * B[6] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[3] * B[5]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[3] * B[5] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[4] * B[4]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[4] * B[4] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[5] * B[3]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[5] * B[3] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[6] * B[2]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[6] * B[2] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[7] * B[1]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 4]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[7] * B[1] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [r8, 32]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [r8, #32]\n\t" "mov r5, #0\n\t" - "# A[2] * B[7]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[2] * B[7] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * B[6]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * B[6] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[4] * B[5]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[4] * B[5] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[5] * B[4]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[5] * B[4] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[6] * B[3]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[6] * B[3] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[7] * B[2]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 8]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[7] * B[2] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [r8, 36]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [r8, #36]\n\t" "mov r3, #0\n\t" - "# A[3] * B[7]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[3] * B[7] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[4] * B[6]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[4] * B[6] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[5] * B[5]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[5] * B[5] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[6] * B[4]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[6] * B[4] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[7] * B[3]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 12]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[7] * B[3] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [r8, 40]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [r8, #40]\n\t" "mov r4, #0\n\t" - "# A[4] * B[7]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[4] * B[7] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[5] * B[6]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[5] * B[6] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[6] * B[5]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[6] * B[5] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[7] * B[4]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[7] * B[4] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [r8, 44]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [r8, #44]\n\t" "mov r5, #0\n\t" - "# A[5] * B[7]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[5] * B[7] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[6] * B[6]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[6] * B[6] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[7] * B[5]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[7] * B[5] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [r8, 48]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [r8, #48]\n\t" "mov r3, #0\n\t" - "# A[6] * B[7]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[6] * B[7] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[7] * B[6]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[7] * B[6] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [r8, 52]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [r8, #52]\n\t" "mov r4, #0\n\t" - "# A[7] * B[7]\n\t" - "ldr r6, [%[a], 28]\n\t" - "ldr r7, [%[b], 28]\n\t" + /* A[7] * B[7] */ + "ldr r6, [%[a], #28]\n\t" + "ldr r7, [%[b], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "str r5, [r8, 56]\n\t" - "str r3, [r8, 60]\n\t" - "ldr r3, [%[tmp], 0]\n\t" - "ldr r4, [%[tmp], 4]\n\t" - "ldr r5, [%[tmp], 8]\n\t" - "ldr r6, [%[tmp], 12]\n\t" - "str r3, [r8, 0]\n\t" - "str r4, [r8, 4]\n\t" - "str r5, [r8, 8]\n\t" - "str r6, [r8, 12]\n\t" - "ldr r3, [%[tmp], 16]\n\t" - "ldr r4, [%[tmp], 20]\n\t" - "ldr r5, [%[tmp], 24]\n\t" - "ldr r6, [%[tmp], 28]\n\t" - "str r3, [r8, 16]\n\t" - "str r4, [r8, 20]\n\t" - "str r5, [r8, 24]\n\t" - "str r6, [r8, 28]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "str r5, [r8, #56]\n\t" + "str r3, [r8, #60]\n\t" + "ldr r3, [%[tmp], #0]\n\t" + "ldr r4, [%[tmp], #4]\n\t" + "ldr r5, [%[tmp], #8]\n\t" + "ldr r6, [%[tmp], #12]\n\t" + "str r3, [r8, #0]\n\t" + "str r4, [r8, #4]\n\t" + "str r5, [r8, #8]\n\t" + "str r6, [r8, #12]\n\t" + "ldr r3, [%[tmp], #16]\n\t" + "ldr r4, [%[tmp], #20]\n\t" + "ldr r5, [%[tmp], #24]\n\t" + "ldr r6, [%[tmp], #28]\n\t" + "str r3, [r8, #16]\n\t" + "str r4, [r8, #20]\n\t" + "str r5, [r8, #24]\n\t" + "str r6, [r8, #28]\n\t" "mov %[r], r8\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) @@ -11429,380 +11437,380 @@ static void sp_256_mont_mul_8(sp_digit* r, sp_digit* a, sp_digit* b, SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) { sp_digit tmp[8]; - __asm__ __volatile__ ( + asm volatile ( "mov r8, %[r]\n\t" "mov %[r], #0\n\t" - "# A[0] * A[0]\n\t" - "ldr r6, [%[a], 0]\n\t" + /* A[0] * A[0] */ + "ldr r6, [%[a], #0]\n\t" "umull r3, r4, r6, r6\n\t" "mov r5, #0\n\t" - "str r3, [%[tmp], 0]\n\t" + "str r3, [%[tmp], #0]\n\t" "mov r3, #0\n\t" - "# A[0] * A[1]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 4]\n\t" + /* A[0] * A[1] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #4]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 4]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #4]\n\t" "mov r4, #0\n\t" - "# A[0] * A[2]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 8]\n\t" + /* A[0] * A[2] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[1] * A[1]\n\t" - "ldr r6, [%[a], 4]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[1] * A[1] */ + "ldr r6, [%[a], #4]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [%[tmp], 8]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [%[tmp], #8]\n\t" "mov r5, #0\n\t" - "# A[0] * A[3]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 12]\n\t" + /* A[0] * A[3] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[1] * A[2]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 8]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[1] * A[2] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #8]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [%[tmp], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [%[tmp], #12]\n\t" "mov r3, #0\n\t" - "# A[0] * A[4]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 16]\n\t" + /* A[0] * A[4] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * A[3]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 12]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * A[3] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[2] * A[2]\n\t" - "ldr r6, [%[a], 8]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[2] * A[2] */ + "ldr r6, [%[a], #8]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #16]\n\t" "mov r4, #0\n\t" - "# A[0] * A[5]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 20]\n\t" + /* A[0] * A[5] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[1] * A[4]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[1] * A[4] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * A[3]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 12]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * A[3] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [%[tmp], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [%[tmp], #20]\n\t" "mov r5, #0\n\t" - "# A[0] * A[6]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 24]\n\t" + /* A[0] * A[6] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[1] * A[5]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[1] * A[5] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[2] * A[4]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 16]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[2] * A[4] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * A[3]\n\t" - "ldr r6, [%[a], 12]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * A[3] */ + "ldr r6, [%[a], #12]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [%[tmp], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [%[tmp], #24]\n\t" "mov r3, #0\n\t" - "# A[0] * A[7]\n\t" - "ldr r6, [%[a], 0]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[0] * A[7] */ + "ldr r6, [%[a], #0]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[1] * A[6]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[1] * A[6] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[2] * A[5]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 20]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[2] * A[5] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[3] * A[4]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[a], 16]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[3] * A[4] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #16]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [%[tmp], 28]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [%[tmp], #28]\n\t" "mov r4, #0\n\t" - "# A[1] * A[7]\n\t" - "ldr r6, [%[a], 4]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[1] * A[7] */ + "ldr r6, [%[a], #4]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[2] * A[6]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[2] * A[6] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[3] * A[5]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[a], 20]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[3] * A[5] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[4] * A[4]\n\t" - "ldr r6, [%[a], 16]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[4] * A[4] */ + "ldr r6, [%[a], #16]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [r8, 32]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [r8, #32]\n\t" "mov r5, #0\n\t" - "# A[2] * A[7]\n\t" - "ldr r6, [%[a], 8]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[2] * A[7] */ + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[3] * A[6]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[3] * A[6] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[4] * A[5]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[a], 20]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[4] * A[5] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[a], #20]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [r8, 36]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [r8, #36]\n\t" "mov r3, #0\n\t" - "# A[3] * A[7]\n\t" - "ldr r6, [%[a], 12]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[3] * A[7] */ + "ldr r6, [%[a], #12]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[4] * A[6]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[4] * A[6] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "# A[5] * A[5]\n\t" - "ldr r6, [%[a], 20]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + /* A[5] * A[5] */ + "ldr r6, [%[a], #20]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [r8, 40]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [r8, #40]\n\t" "mov r4, #0\n\t" - "# A[4] * A[7]\n\t" - "ldr r6, [%[a], 16]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[4] * A[7] */ + "ldr r6, [%[a], #16]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "# A[5] * A[6]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[a], 24]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + /* A[5] * A[6] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[a], #24]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "adc r4, %[r]\n\t" - "str r5, [r8, 44]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "adc r4, r4, %[r]\n\t" + "str r5, [r8, #44]\n\t" "mov r5, #0\n\t" - "# A[5] * A[7]\n\t" - "ldr r6, [%[a], 20]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[5] * A[7] */ + "ldr r6, [%[a], #20]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[6] * A[6]\n\t" - "ldr r6, [%[a], 24]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[6] * A[6] */ + "ldr r6, [%[a], #24]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "str r3, [r8, 48]\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + "str r3, [r8, #48]\n\t" "mov r3, #0\n\t" - "# A[6] * A[7]\n\t" - "ldr r6, [%[a], 24]\n\t" - "ldr r7, [%[a], 28]\n\t" + /* A[6] * A[7] */ + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" "umull r6, r7, r6, r7\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" - "adc r3, %[r]\n\t" - "str r4, [r8, 52]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" + "adc r3, r3, %[r]\n\t" + "str r4, [r8, #52]\n\t" "mov r4, #0\n\t" - "# A[7] * A[7]\n\t" - "ldr r6, [%[a], 28]\n\t" + /* A[7] * A[7] */ + "ldr r6, [%[a], #28]\n\t" "umull r6, r7, r6, r6\n\t" - "adds r5, r6\n\t" - "adcs r3, r7\n\t" - "str r5, [r8, 56]\n\t" - "str r3, [r8, 60]\n\t" - "ldr r3, [%[tmp], 0]\n\t" - "ldr r4, [%[tmp], 4]\n\t" - "ldr r5, [%[tmp], 8]\n\t" - "ldr r6, [%[tmp], 12]\n\t" - "str r3, [r8, 0]\n\t" - "str r4, [r8, 4]\n\t" - "str r5, [r8, 8]\n\t" - "str r6, [r8, 12]\n\t" - "ldr r3, [%[tmp], 16]\n\t" - "ldr r4, [%[tmp], 20]\n\t" - "ldr r5, [%[tmp], 24]\n\t" - "ldr r6, [%[tmp], 28]\n\t" - "str r3, [r8, 16]\n\t" - "str r4, [r8, 20]\n\t" - "str r5, [r8, 24]\n\t" - "str r6, [r8, 28]\n\t" + "adds r5, r5, r6\n\t" + "adcs r3, r3, r7\n\t" + "str r5, [r8, #56]\n\t" + "str r3, [r8, #60]\n\t" + "ldr r3, [%[tmp], #0]\n\t" + "ldr r4, [%[tmp], #4]\n\t" + "ldr r5, [%[tmp], #8]\n\t" + "ldr r6, [%[tmp], #12]\n\t" + "str r3, [r8, #0]\n\t" + "str r4, [r8, #4]\n\t" + "str r5, [r8, #8]\n\t" + "str r6, [r8, #12]\n\t" + "ldr r3, [%[tmp], #16]\n\t" + "ldr r4, [%[tmp], #20]\n\t" + "ldr r5, [%[tmp], #24]\n\t" + "ldr r6, [%[tmp], #28]\n\t" + "str r3, [r8, #16]\n\t" + "str r4, [r8, #20]\n\t" + "str r5, [r8, #24]\n\t" + "str r6, [r8, #28]\n\t" "mov %[r], r8\n\t" : : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp) @@ -11824,7 +11832,7 @@ static void sp_256_mont_sqr_8(sp_digit* r, sp_digit* a, sp_digit* m, sp_256_mont_reduce_8(r, m, mp); } -#ifndef WOLFSSL_SP_SMALL +#if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY) /* Square the Montgomery form number a number of times. (r = a ^ n mod m) * * r Result of squaring. @@ -11841,7 +11849,8 @@ static void sp_256_mont_sqr_n_8(sp_digit* r, sp_digit* a, int n, sp_256_mont_sqr_8(r, r, m, mp); } -#else +#endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */ +#ifdef WOLFSSL_SP_SMALL /* Mod-2 for the P256 curve. */ static const uint32_t p256_mod_2[8] = { 0xfffffffd,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000, @@ -11975,22 +11984,22 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r6, %[a]\n\t" "mov r7, #0\n\t" - "add r6, #32\n\t" - "sub r7, #1\n\t" + "add r6, r6, #32\n\t" + "sub r7, r7, #1\n\t" "\n1:\n\t" - "adds %[c], r7\n\t" + "adds %[c], %[c], r7\n\t" "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r]]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" + "adc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -12013,41 +12022,41 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[b], #0]\n\t" - "adds r4, r5\n\t" + "adds r4, r4, r5\n\t" "str r4, [%[r], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #4]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[b], #8]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #12]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[b], #16]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #20]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[b], #24]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #28]\n\t" - "adcs r4, r5\n\t" + "adcs r4, r4, r5\n\t" "str r4, [%[r], #28]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c]\n\t" + "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5" @@ -12069,67 +12078,67 @@ SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, sp_digit* a, sp_digit* b, { (void)m; - __asm__ __volatile__ ( + asm volatile ( "mov r3, #0\n\t" "ldr r4, [%[a],#0]\n\t" "ldr r5, [%[a],#4]\n\t" "ldr r6, [%[b],#0]\n\t" "ldr r7, [%[b],#4]\n\t" - "adds r4, r6\n\t" - "adcs r5, r7\n\t" + "adds r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" "str r4, [%[r],#0]\n\t" "str r5, [%[r],#4]\n\t" "ldr r4, [%[a],#8]\n\t" "ldr r5, [%[a],#12]\n\t" "ldr r6, [%[b],#8]\n\t" "ldr r7, [%[b],#12]\n\t" - "adcs r4, r6\n\t" - "adcs r5, r7\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" "str r4, [%[r],#8]\n\t" "str r5, [%[r],#12]\n\t" "ldr r4, [%[a],#16]\n\t" "ldr r5, [%[a],#20]\n\t" "ldr r6, [%[b],#16]\n\t" "ldr r7, [%[b],#20]\n\t" - "adcs r4, r6\n\t" - "adcs r5, r7\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" "mov r8, r4\n\t" "mov r9, r5\n\t" "ldr r4, [%[a],#24]\n\t" "ldr r5, [%[a],#28]\n\t" "ldr r6, [%[b],#24]\n\t" "ldr r7, [%[b],#28]\n\t" - "adcs r4, r6\n\t" - "adcs r5, r7\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r7\n\t" "mov r10, r4\n\t" "mov r11, r5\n\t" - "adc r3, r3\n\t" + "adc r3, r3, r3\n\t" "mov r6, r3\n\t" - "sub r3, #1\n\t" + "sub r3, r3, #1\n\t" "mvn r3, r3\n\t" "mov r7, #0\n\t" "ldr r4, [%[r],#0]\n\t" "ldr r5, [%[r],#4]\n\t" - "subs r4, r3\n\t" - "sbcs r5, r3\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, r3\n\t" "str r4, [%[r],#0]\n\t" "str r5, [%[r],#4]\n\t" "ldr r4, [%[r],#8]\n\t" "ldr r5, [%[r],#12]\n\t" - "sbcs r4, r3\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r3\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r],#8]\n\t" "str r5, [%[r],#12]\n\t" "mov r4, r8\n\t" "mov r5, r9\n\t" - "sbcs r4, r7\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r7\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r],#16]\n\t" "str r5, [%[r],#20]\n\t" "mov r4, r10\n\t" "mov r5, r11\n\t" - "sbcs r4, r6\n\t" - "sbc r5, r3\n\t" + "sbcs r4, r4, r6\n\t" + "sbc r5, r5, r3\n\t" "str r4, [%[r],#24]\n\t" "str r5, [%[r],#28]\n\t" : @@ -12148,15 +12157,15 @@ SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, sp_digit* a, sp_digit* m) { (void)m; - __asm__ __volatile__ ( + asm volatile ( "ldr r4, [%[a],#0]\n\t" "ldr r5, [%[a],#4]\n\t" "ldr r6, [%[a],#8]\n\t" "ldr r7, [%[a],#12]\n\t" - "adds r4, r4\n\t" - "adcs r5, r5\n\t" - "adcs r6, r6\n\t" - "adcs r7, r7\n\t" + "adds r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" "str r4, [%[r],#0]\n\t" "str r5, [%[r],#4]\n\t" "str r6, [%[r],#8]\n\t" @@ -12165,42 +12174,42 @@ SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, sp_digit* a, sp_digit* m) "ldr r5, [%[a],#20]\n\t" "ldr r6, [%[a],#24]\n\t" "ldr r7, [%[a],#28]\n\t" - "adcs r4, r4\n\t" - "adcs r5, r5\n\t" - "adcs r6, r6\n\t" - "adcs r7, r7\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" + "adcs r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" "mov r8, r4\n\t" "mov r9, r5\n\t" "mov r10, r6\n\t" "mov r11, r7\n\t" "mov r3, #0\n\t" "mov r7, #0\n\t" - "adc r3, r3\n\t" + "adc r3, r3, r3\n\t" "mov r2, r3\n\t" - "sub r3, #1\n\t" + "sub r3, r3, #1\n\t" "mvn r3, r3\n\t" "ldr r4, [%[r],#0]\n\t" "ldr r5, [%[r],#4]\n\t" "ldr r6, [%[r],#8]\n\t" - "subs r4, r3\n\t" - "sbcs r5, r3\n\t" - "sbcs r6, r3\n\t" + "subs r4, r4, r3\n\t" + "sbcs r5, r5, r3\n\t" + "sbcs r6, r6, r3\n\t" "str r4, [%[r],#0]\n\t" "str r5, [%[r],#4]\n\t" "str r6, [%[r],#8]\n\t" "ldr r4, [%[r],#12]\n\t" "mov r5, r8\n\t" "mov r6, r9\n\t" - "sbcs r4, r7\n\t" - "sbcs r5, r7\n\t" - "sbcs r6, r7\n\t" + "sbcs r4, r4, r7\n\t" + "sbcs r5, r5, r7\n\t" + "sbcs r6, r6, r7\n\t" "str r4, [%[r],#12]\n\t" "str r5, [%[r],#16]\n\t" "str r6, [%[r],#20]\n\t" "mov r4, r10\n\t" "mov r5, r11\n\t" - "sbcs r4, r2\n\t" - "sbc r5, r3\n\t" + "sbcs r4, r4, r2\n\t" + "sbc r5, r5, r3\n\t" "str r4, [%[r],#24]\n\t" "str r5, [%[r],#28]\n\t" : @@ -12219,110 +12228,110 @@ SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, sp_digit* a, sp_digit* m) { (void)m; - __asm__ __volatile__ ( + asm volatile ( "ldr r6, [%[a],#0]\n\t" "ldr r7, [%[a],#4]\n\t" "ldr r4, [%[a],#8]\n\t" "ldr r5, [%[a],#12]\n\t" - "adds r6, r6\n\t" - "adcs r7, r7\n\t" - "adcs r4, r4\n\t" - "adcs r5, r5\n\t" + "adds r6, r6, r6\n\t" + "adcs r7, r7, r7\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" "mov r8, r4\n\t" "mov r9, r5\n\t" "ldr r2, [%[a],#16]\n\t" "ldr r3, [%[a],#20]\n\t" "ldr r4, [%[a],#24]\n\t" "ldr r5, [%[a],#28]\n\t" - "adcs r2, r2\n\t" - "adcs r3, r3\n\t" - "adcs r4, r4\n\t" - "adcs r5, r5\n\t" + "adcs r2, r2, r2\n\t" + "adcs r3, r3, r3\n\t" + "adcs r4, r4, r4\n\t" + "adcs r5, r5, r5\n\t" "mov r10, r2\n\t" "mov r11, r3\n\t" "mov r12, r4\n\t" "mov r14, r5\n\t" "mov r3, #0\n\t" "mov r5, #0\n\t" - "adc r3, r3\n\t" + "adc r3, r3, r3\n\t" "mov r4, r3\n\t" - "sub r3, #1\n\t" + "sub r3, r3, #1\n\t" "mvn r3, r3\n\t" - "subs r6, r3\n\t" - "sbcs r7, r3\n\t" + "subs r6, r6, r3\n\t" + "sbcs r7, r7, r3\n\t" "mov r2, r8\n\t" - "sbcs r2, r3\n\t" + "sbcs r2, r2, r3\n\t" "mov r8, r2\n\t" "mov r2, r9\n\t" - "sbcs r2, r5\n\t" + "sbcs r2, r2, r5\n\t" "mov r9, r2\n\t" "mov r2, r10\n\t" - "sbcs r2, r5\n\t" + "sbcs r2, r2, r5\n\t" "mov r10, r2\n\t" "mov r2, r11\n\t" - "sbcs r2, r5\n\t" + "sbcs r2, r2, r5\n\t" "mov r11, r2\n\t" "mov r2, r12\n\t" - "sbcs r2, r4\n\t" + "sbcs r2, r2, r4\n\t" "mov r12, r2\n\t" "mov r2, r14\n\t" - "sbc r2, r3\n\t" + "sbc r2, r2, r3\n\t" "mov r14, r2\n\t" "ldr r2, [%[a],#0]\n\t" "ldr r3, [%[a],#4]\n\t" - "adds r6, r2\n\t" - "adcs r7, r3\n\t" + "adds r6, r6, r2\n\t" + "adcs r7, r7, r3\n\t" "ldr r2, [%[a],#8]\n\t" "ldr r3, [%[a],#12]\n\t" "mov r4, r8\n\t" "mov r5, r9\n\t" - "adcs r2, r4\n\t" - "adcs r3, r5\n\t" + "adcs r2, r2, r4\n\t" + "adcs r3, r3, r5\n\t" "mov r8, r2\n\t" "mov r9, r3\n\t" "ldr r2, [%[a],#16]\n\t" "ldr r3, [%[a],#20]\n\t" "mov r4, r10\n\t" "mov r5, r11\n\t" - "adcs r2, r4\n\t" - "adcs r3, r5\n\t" + "adcs r2, r2, r4\n\t" + "adcs r3, r3, r5\n\t" "mov r10, r2\n\t" "mov r11, r3\n\t" "ldr r2, [%[a],#24]\n\t" "ldr r3, [%[a],#28]\n\t" "mov r4, r12\n\t" "mov r5, r14\n\t" - "adcs r2, r4\n\t" - "adcs r3, r5\n\t" + "adcs r2, r2, r4\n\t" + "adcs r3, r3, r5\n\t" "mov r12, r2\n\t" "mov r14, r3\n\t" "mov r3, #0\n\t" "mov r5, #0\n\t" - "adc r3, r3\n\t" + "adc r3, r3, r3\n\t" "mov r4, r3\n\t" - "sub r3, #1\n\t" + "sub r3, r3, #1\n\t" "mvn r3, r3\n\t" - "subs r6, r3\n\t" + "subs r6, r6, r3\n\t" "str r6, [%[r],#0]\n\t" - "sbcs r7, r3\n\t" + "sbcs r7, r7, r3\n\t" "str r7, [%[r],#4]\n\t" "mov r2, r8\n\t" - "sbcs r2, r3\n\t" + "sbcs r2, r2, r3\n\t" "str r2, [%[r],#8]\n\t" "mov r2, r9\n\t" - "sbcs r2, r5\n\t" + "sbcs r2, r2, r5\n\t" "str r2, [%[r],#12]\n\t" "mov r2, r10\n\t" - "sbcs r2, r5\n\t" + "sbcs r2, r2, r5\n\t" "str r2, [%[r],#16]\n\t" "mov r2, r11\n\t" - "sbcs r2, r5\n\t" + "sbcs r2, r2, r5\n\t" "str r2, [%[r],#20]\n\t" "mov r2, r12\n\t" - "sbcs r2, r4\n\t" + "sbcs r2, r2, r4\n\t" "str r2, [%[r],#24]\n\t" "mov r2, r14\n\t" - "sbc r2, r3\n\t" + "sbc r2, r2, r3\n\t" "str r2, [%[r],#28]\n\t" : : [r] "r" (r), [a] "r" (a) @@ -12342,64 +12351,64 @@ SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, sp_digit* a, sp_digit* b, { (void)m; - __asm__ __volatile__ ( + asm volatile ( "ldr r4, [%[a],#0]\n\t" "ldr r5, [%[a],#4]\n\t" "ldr r6, [%[b],#0]\n\t" "ldr r7, [%[b],#4]\n\t" - "subs r4, r6\n\t" - "sbcs r5, r7\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r],#0]\n\t" "str r5, [%[r],#4]\n\t" "ldr r4, [%[a],#8]\n\t" "ldr r5, [%[a],#12]\n\t" "ldr r6, [%[b],#8]\n\t" "ldr r7, [%[b],#12]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r],#8]\n\t" "str r5, [%[r],#12]\n\t" "ldr r4, [%[a],#16]\n\t" "ldr r5, [%[a],#20]\n\t" "ldr r6, [%[b],#16]\n\t" "ldr r7, [%[b],#20]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "mov r8, r4\n\t" "mov r9, r5\n\t" "ldr r4, [%[a],#24]\n\t" "ldr r5, [%[a],#28]\n\t" "ldr r6, [%[b],#24]\n\t" "ldr r7, [%[b],#28]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "mov r10, r4\n\t" "mov r11, r5\n\t" - "sbc r3, r3\n\t" + "sbc r3, r3, r3\n\t" "lsr r7, r3, #31\n\t" "mov r6, #0\n\t" "ldr r4, [%[r],#0]\n\t" "ldr r5, [%[r],#4]\n\t" - "adds r4, r3\n\t" - "adcs r5, r3\n\t" + "adds r4, r4, r3\n\t" + "adcs r5, r5, r3\n\t" "str r4, [%[r],#0]\n\t" "str r5, [%[r],#4]\n\t" "ldr r4, [%[r],#8]\n\t" "ldr r5, [%[r],#12]\n\t" - "adcs r4, r3\n\t" - "adcs r5, r6\n\t" + "adcs r4, r4, r3\n\t" + "adcs r5, r5, r6\n\t" "str r4, [%[r],#8]\n\t" "str r5, [%[r],#12]\n\t" "mov r4, r8\n\t" "mov r5, r9\n\t" - "adcs r4, r6\n\t" - "adcs r5, r6\n\t" + "adcs r4, r4, r6\n\t" + "adcs r5, r5, r6\n\t" "str r4, [%[r],#16]\n\t" "str r5, [%[r],#20]\n\t" "mov r4, r10\n\t" "mov r5, r11\n\t" - "adcs r4, r7\n\t" - "adc r5, r3\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, r3\n\t" "str r4, [%[r],#24]\n\t" "str r5, [%[r],#28]\n\t" : @@ -12416,45 +12425,45 @@ SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, sp_digit* a, sp_digit* b, */ SP_NOINLINE static void sp_256_div2_8(sp_digit* r, sp_digit* a, sp_digit* m) { - __asm__ __volatile__ ( + asm volatile ( "ldr r7, [%[a], #0]\n\t" "lsl r7, r7, #31\n\t" "lsr r7, r7, #31\n\t" "mov r5, #0\n\t" - "sub r5, r7\n\t" + "sub r5, r5, r7\n\t" "mov r7, #0\n\t" "lsl r6, r5, #31\n\t" "lsr r6, r6, #31\n\t" "ldr r3, [%[a], #0]\n\t" "ldr r4, [%[a], #4]\n\t" - "adds r3, r5\n\t" - "adcs r4, r5\n\t" + "adds r3, r3, r5\n\t" + "adcs r4, r4, r5\n\t" "str r3, [%[r], #0]\n\t" "str r4, [%[r], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" - "adcs r3, r5\n\t" - "adcs r4, r7\n\t" + "adcs r3, r3, r5\n\t" + "adcs r4, r4, r7\n\t" "str r3, [%[r], #8]\n\t" "str r4, [%[r], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" - "adcs r3, r7\n\t" - "adcs r4, r7\n\t" + "adcs r3, r3, r7\n\t" + "adcs r4, r4, r7\n\t" "str r3, [%[r], #16]\n\t" "str r4, [%[r], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" - "adcs r3, r6\n\t" - "adcs r4, r5\n\t" - "adc r7, r7\n\t" + "adcs r3, r3, r6\n\t" + "adcs r4, r4, r5\n\t" + "adc r7, r7, r7\n\t" "lsl r7, r7, #31\n\t" "lsr r5, r3, #1\n\t" "lsl r3, r3, #31\n\t" "lsr r6, r4, #1\n\t" "lsl r4, r4, #31\n\t" - "orr r5, r4\n\t" - "orr r6, r7\n\t" + "orr r5, r5, r4\n\t" + "orr r6, r6, r7\n\t" "mov r7, r3\n\t" "str r5, [%[r], #24]\n\t" "str r6, [%[r], #28]\n\t" @@ -12464,8 +12473,8 @@ SP_NOINLINE static void sp_256_div2_8(sp_digit* r, sp_digit* a, sp_digit* m) "lsl r3, r3, #31\n\t" "lsr r6, r4, #1\n\t" "lsl r4, r4, #31\n\t" - "orr r5, r4\n\t" - "orr r6, r7\n\t" + "orr r5, r5, r4\n\t" + "orr r6, r6, r7\n\t" "mov r7, r3\n\t" "str r5, [%[r], #16]\n\t" "str r6, [%[r], #20]\n\t" @@ -12475,8 +12484,8 @@ SP_NOINLINE static void sp_256_div2_8(sp_digit* r, sp_digit* a, sp_digit* m) "lsl r3, r3, #31\n\t" "lsr r6, r4, #1\n\t" "lsl r4, r4, #31\n\t" - "orr r5, r4\n\t" - "orr r6, r7\n\t" + "orr r5, r5, r4\n\t" + "orr r6, r6, r7\n\t" "mov r7, r3\n\t" "str r5, [%[r], #8]\n\t" "str r6, [%[r], #12]\n\t" @@ -12485,8 +12494,8 @@ SP_NOINLINE static void sp_256_div2_8(sp_digit* r, sp_digit* a, sp_digit* m) "lsr r5, r3, #1\n\t" "lsr r6, r4, #1\n\t" "lsl r4, r4, #31\n\t" - "orr r5, r4\n\t" - "orr r6, r7\n\t" + "orr r5, r5, r4\n\t" + "orr r6, r6, r7\n\t" "str r5, [%[r], #0]\n\t" "str r6, [%[r], #4]\n\t" : @@ -12580,20 +12589,20 @@ SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r6, %[a]\n\t" - "add r6, #32\n\t" + "add r6, r6, #32\n\t" "\n1:\n\t" "mov r5, #0\n\t" - "subs r5, %[c]\n\t" + "subs r5, r5, %[c]\n\t" "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" - "sbcs r4, r5\n\t" + "sbcs r4, r4, r5\n\t" "str r4, [%[r]]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #4\n\t" - "add %[b], #4\n\t" - "add %[r], #4\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #4\n\t" + "add %[b], %[b], #4\n\t" + "add %[r], %[r], #4\n\t" "cmp %[a], r6\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) @@ -12616,40 +12625,40 @@ SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[a], #4]\n\t" "ldr r6, [%[b], #0]\n\t" "ldr r7, [%[b], #4]\n\t" - "subs r4, r6\n\t" - "sbcs r5, r7\n\t" + "subs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #0]\n\t" "str r5, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[a], #12]\n\t" "ldr r6, [%[b], #8]\n\t" "ldr r7, [%[b], #12]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #8]\n\t" "str r5, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[a], #20]\n\t" "ldr r6, [%[b], #16]\n\t" "ldr r7, [%[b], #20]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[a], #28]\n\t" "ldr r6, [%[b], #24]\n\t" "ldr r7, [%[b], #28]\n\t" - "sbcs r4, r6\n\t" - "sbcs r5, r7\n\t" + "sbcs r4, r4, r6\n\t" + "sbcs r5, r5, r7\n\t" "str r4, [%[r], #24]\n\t" "str r5, [%[r], #28]\n\t" - "sbc %[c], %[c]\n\t" + "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : : "memory", "r4", "r5", "r6", "r7" @@ -13267,7 +13276,7 @@ static void sp_ecc_get_cache(sp_point* g, sp_cache_t** cache) if (!sp_cache[i].set) continue; - if (sp_256_cmp_equal_8(g->x, sp_cache[i].x) & + if (sp_256_cmp_equal_8(g->x, sp_cache[i].x) & sp_256_cmp_equal_8(g->y, sp_cache[i].y)) { sp_cache[i].cnt++; break; @@ -13556,7 +13565,7 @@ static void sp_ecc_get_cache(sp_point* g, sp_cache_t** cache) if (!sp_cache[i].set) continue; - if (sp_256_cmp_equal_8(g->x, sp_cache[i].x) & + if (sp_256_cmp_equal_8(g->x, sp_cache[i].x) & sp_256_cmp_equal_8(g->y, sp_cache[i].y)) { sp_cache[i].cnt++; break; @@ -13671,7 +13680,8 @@ int sp_ecc_mulmod_256(mp_int* km, ecc_point* gm, ecc_point* r, int map, err = sp_ecc_point_new(heap, p, point); #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) if (err == MP_OKAY) { - k = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC); + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; } @@ -15388,7 +15398,8 @@ int sp_ecc_mulmod_base_256(mp_int* km, ecc_point* r, int map, void* heap) err = sp_ecc_point_new(heap, p, point); #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) if (err == MP_OKAY) { - k = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC); + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; } @@ -15432,32 +15443,32 @@ static int sp_256_iszero_8(const sp_digit* a) */ SP_NOINLINE static void sp_256_add_one_8(sp_digit* a) { - __asm__ __volatile__ ( + asm volatile ( "mov r2, #1\n\t" "ldr r1, [%[a], #0]\n\t" - "adds r1, r2\n\t" + "adds r1, r1, r2\n\t" "mov r2, #0\n\t" "str r1, [%[a], #0]\n\t" "ldr r1, [%[a], #4]\n\t" - "adcs r1, r2\n\t" + "adcs r1, r1, r2\n\t" "str r1, [%[a], #4]\n\t" "ldr r1, [%[a], #8]\n\t" - "adcs r1, r2\n\t" + "adcs r1, r1, r2\n\t" "str r1, [%[a], #8]\n\t" "ldr r1, [%[a], #12]\n\t" - "adcs r1, r2\n\t" + "adcs r1, r1, r2\n\t" "str r1, [%[a], #12]\n\t" "ldr r1, [%[a], #16]\n\t" - "adcs r1, r2\n\t" + "adcs r1, r1, r2\n\t" "str r1, [%[a], #16]\n\t" "ldr r1, [%[a], #20]\n\t" - "adcs r1, r2\n\t" + "adcs r1, r1, r2\n\t" "str r1, [%[a], #20]\n\t" "ldr r1, [%[a], #24]\n\t" - "adcs r1, r2\n\t" + "adcs r1, r1, r2\n\t" "str r1, [%[a], #24]\n\t" "ldr r1, [%[a], #28]\n\t" - "adcs r1, r2\n\t" + "adcs r1, r1, r2\n\t" "str r1, [%[a], #28]\n\t" : : [a] "r" (a) @@ -15555,7 +15566,8 @@ int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap) #endif #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) if (err == MP_OKAY) { - k = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC); + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; } @@ -15657,7 +15669,8 @@ int sp_ecc_secret_gen_256(mp_int* priv, ecc_point* pub, byte* out, err = sp_ecc_point_new(heap, p, point); #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) if (err == MP_OKAY) { - k = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC); + k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); if (k == NULL) err = MEMORY_E; } @@ -15698,23 +15711,23 @@ SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "mov r7, %[a]\n\t" - "add r7, #32\n\t" + "add r7, r7, #32\n\t" "\n1:\n\t" "mov r5, #0\n\t" - "subs r5, %[c]\n\t" + "subs r5, r5, %[c]\n\t" "ldr r3, [%[a]]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b]]\n\t" "ldr r6, [%[b], #4]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a]]\n\t" "str r4, [%[a], #4]\n\t" - "sbc %[c], %[c]\n\t" - "add %[a], #8\n\t" - "add %[b], #8\n\t" + "sbc %[c], %[c], %[c]\n\t" + "add %[a], %[a], #8\n\t" + "add %[b], %[b], #8\n\t" "cmp %[a], r7\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) @@ -15737,40 +15750,40 @@ SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, { sp_digit c = 0; - __asm__ __volatile__ ( + asm volatile ( "ldr r3, [%[a], #0]\n\t" "ldr r4, [%[a], #4]\n\t" "ldr r5, [%[b], #0]\n\t" "ldr r6, [%[b], #4]\n\t" - "subs r3, r5\n\t" - "sbcs r4, r6\n\t" + "subs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #0]\n\t" "str r4, [%[a], #4]\n\t" "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "ldr r5, [%[b], #8]\n\t" "ldr r6, [%[b], #12]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #8]\n\t" "str r4, [%[a], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" "ldr r5, [%[b], #16]\n\t" "ldr r6, [%[b], #20]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #16]\n\t" "str r4, [%[a], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "ldr r5, [%[b], #24]\n\t" "ldr r6, [%[b], #28]\n\t" - "sbcs r3, r5\n\t" - "sbcs r4, r6\n\t" + "sbcs r3, r3, r5\n\t" + "sbcs r4, r4, r6\n\t" "str r3, [%[a], #24]\n\t" "str r4, [%[a], #28]\n\t" - "sbc %[c], %[c]\n\t" + "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : : "memory", "r3", "r4", "r5", "r6" @@ -15789,29 +15802,29 @@ SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, const sp_digit b) { - __asm__ __volatile__ ( + asm volatile ( "mov r6, #32\n\t" - "add r6, %[a]\n\t" + "add r6, r6, %[a]\n\t" "mov r8, %[r]\n\t" "mov r9, r6\n\t" "mov r3, #0\n\t" "mov r4, #0\n\t" - "1:\n\t" + "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" - "# A[] * B\n\t" + /* A[] * B */ "ldr r6, [%[a]]\n\t" "umull r6, r7, r6, %[b]\n\t" - "adds r3, r6\n\t" - "adcs r4, r7\n\t" - "adc r5, %[r]\n\t" - "# A[] * B - Done\n\t" + "adds r3, r3, r6\n\t" + "adcs r4, r4, r7\n\t" + "adc r5, r5, %[r]\n\t" + /* A[] * B - Done */ "mov %[r], r8\n\t" "str r3, [%[r]]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add %[r], #4\n\t" - "add %[a], #4\n\t" + "add %[r], %[r], #4\n\t" + "add %[a], %[a], #4\n\t" "mov r8, %[r]\n\t" "cmp %[a], r9\n\t" "blt 1b\n\t" @@ -15836,37 +15849,37 @@ SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, { sp_digit r = 0; - __asm__ __volatile__ ( - "lsr r6, %[div], 16\n\t\n\t" - "add r6, r6, 1\n\t\n\t" - "udiv r4, %[d1], r6\n\t\n\t" - "lsl r7, r4, 16\n\t\n\t" - "umull r4, r5, %[div], r7\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "udiv r5, %[d1], r6\n\t\n\t" - "lsl r4, r5, 16\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "lsl r4, %[d1], 16\n\t\n\t" - "orr r4, r4, %[d0], lsr 16\n\t\n\t" - "udiv r4, r4, r6\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "lsl r4, %[d1], 16\n\t\n\t" - "orr r4, r4, %[d0], lsr 16\n\t\n\t" - "udiv r4, r4, r6\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "umull r4, r5, %[div], r4\n\t\n\t" - "subs %[d0], %[d0], r4\n\t\n\t" - "sbc %[d1], %[d1], r5\n\t\n\t" - "udiv r4, %[d0], %[div]\n\t\n\t" - "add r7, r7, r4\n\t\n\t" - "mov %[r], r7\n\t\n\t" + asm volatile ( + "lsr r6, %[div], #16\n\t" + "add r6, r6, #1\n\t" + "udiv r4, %[d1], r6\n\t" + "lsl r7, r4, #16\n\t" + "umull r4, r5, %[div], r7\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r5, %[d1], r6\n\t" + "lsl r4, r5, #16\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr 16\n\t" + "udiv r4, r4, r6\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "lsl r4, %[d1], #16\n\t" + "orr r4, r4, %[d0], lsr 16\n\t" + "udiv r4, r4, r6\n\t" + "add r7, r7, r4\n\t" + "umull r4, r5, %[div], r4\n\t" + "subs %[d0], %[d0], r4\n\t" + "sbc %[d1], %[d1], r5\n\t" + "udiv r4, %[d0], %[div]\n\t" + "add r7, r7, r4\n\t" + "mov %[r], r7\n\t" : [r] "+r" (r) : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) : "r4", "r5", "r6", "r7" @@ -16152,7 +16165,8 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, mp_int* priv, err = sp_ecc_point_new(heap, p, point); #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) if (err == MP_OKAY) { - d = XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap, DYNAMIC_TYPE_ECC); + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap, + DYNAMIC_TYPE_ECC); if (d != NULL) { e = d + 0 * 8; x = d + 2 * 8; @@ -16306,7 +16320,8 @@ int sp_ecc_verify_256(const byte* hash, word32 hashLen, mp_int* pX, err = sp_ecc_point_new(heap, p2d, p2); #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) if (err == MP_OKAY) { - d = XMALLOC(sizeof(sp_digit) * 16 * 8, heap, DYNAMIC_TYPE_ECC); + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 8, heap, + DYNAMIC_TYPE_ECC); if (d != NULL) { u1 = d + 0 * 8; u2 = d + 2 * 8; @@ -16421,7 +16436,8 @@ static int sp_256_ecc_is_point_8(sp_point* point, void* heap) int err = MP_OKAY; #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) - d = XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC); + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 4, heap, + DYNAMIC_TYPE_ECC); if (d != NULL) { t1 = d + 0 * 8; t2 = d + 2 * 8; @@ -16520,7 +16536,8 @@ int sp_ecc_check_key_256(mp_int* pX, mp_int* pY, mp_int* privm, void* heap) err = sp_ecc_point_new(heap, pd, p); #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) if (err == MP_OKAY) { - priv = XMALLOC(sizeof(sp_digit) * 8, heap, DYNAMIC_TYPE_ECC); + priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap, + DYNAMIC_TYPE_ECC); if (priv == NULL) err = MEMORY_E; } @@ -16620,7 +16637,8 @@ int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_ecc_point_new(NULL, qd, q); #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) if (err == MP_OKAY) { - tmp = XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL, DYNAMIC_TYPE_ECC); + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL, + DYNAMIC_TYPE_ECC); if (tmp == NULL) err = MEMORY_E; } @@ -16681,7 +16699,8 @@ int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ, err = sp_ecc_point_new(NULL, pd, p); #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) if (err == MP_OKAY) { - tmp = XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL, DYNAMIC_TYPE_ECC); + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL, + DYNAMIC_TYPE_ECC); if (tmp == NULL) err = MEMORY_E; } @@ -16734,7 +16753,8 @@ int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ) err = sp_ecc_point_new(NULL, pd, p); #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) if (err == MP_OKAY) { - tmp = XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL, DYNAMIC_TYPE_ECC); + tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL, + DYNAMIC_TYPE_ECC); if (tmp == NULL) err = MEMORY_E; } @@ -16784,7 +16804,7 @@ static int sp_256_mont_sqrt_8(sp_digit* y) int err = MP_OKAY; #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) - d = XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC); + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC); if (d != NULL) { t1 = d + 0 * 8; t2 = d + 2 * 8; @@ -16858,7 +16878,7 @@ int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym) int err = MP_OKAY; #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) - d = XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC); + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC); if (d != NULL) { x = d + 0 * 8; y = d + 2 * 8;