From 34a462b3426774f72894657edb1904ec6fec98d3 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Tue, 7 Jan 2020 12:53:34 +1000 Subject: [PATCH] Don't use r7 with Cortex-M SP assembly r7 not available when compiling Cortex-M4 in debug. --- wolfcrypt/src/sp_cortexm.c | 4440 ++++++++++++++++++------------------ 1 file changed, 2199 insertions(+), 2241 deletions(-) diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index 1c57daa70..a8679d977 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -1,6 +1,6 @@ /* sp.c * - * Copyright (C) 2006-2019 wolfSSL Inc. + * Copyright (C) 2006-2020 wolfSSL Inc. * * This file is part of wolfSSL. * @@ -224,475 +224,475 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( /* A[0] * B[0] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r3, r4, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r3, r4, r6, r8\n\t" "mov r5, #0\n\t" "str r3, [%[tmp], #0]\n\t" "mov r3, #0\n\t" /* A[0] * B[1] */ - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" /* A[1] * B[0] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "str r4, [%[tmp], #4]\n\t" "mov r4, #0\n\t" /* A[0] * B[2] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[1] * B[1] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[2] * B[0] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "str r5, [%[tmp], #8]\n\t" "mov r5, #0\n\t" /* A[0] * B[3] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[1] * B[2] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[2] * B[1] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[3] * B[0] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" "str r3, [%[tmp], #12]\n\t" "mov r3, #0\n\t" /* A[0] * B[4] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[1] * B[3] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[2] * B[2] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[3] * B[1] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[4] * B[0] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "str r4, [%[tmp], #16]\n\t" "mov r4, #0\n\t" /* A[0] * B[5] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[1] * B[4] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[2] * B[3] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[3] * B[2] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[4] * B[1] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[5] * B[0] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "str r5, [%[tmp], #20]\n\t" "mov r5, #0\n\t" /* A[0] * B[6] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[1] * B[5] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[2] * B[4] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[3] * B[3] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[4] * B[2] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[5] * B[1] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[6] * B[0] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" "str r3, [%[tmp], #24]\n\t" "mov r3, #0\n\t" /* A[0] * B[7] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[1] * B[6] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[2] * B[5] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[3] * B[4] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[4] * B[3] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[5] * B[2] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[6] * B[1] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[7] * B[0] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "str r4, [%[tmp], #28]\n\t" "mov r4, #0\n\t" /* A[1] * B[7] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[2] * B[6] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[3] * B[5] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[4] * B[4] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[5] * B[3] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[6] * B[2] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[7] * B[1] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "str r5, [%[r], #32]\n\t" "mov r5, #0\n\t" /* A[2] * B[7] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[3] * B[6] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[4] * B[5] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[5] * B[4] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[6] * B[3] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[7] * B[2] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" "str r3, [%[r], #36]\n\t" "mov r3, #0\n\t" /* A[3] * B[7] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[4] * B[6] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[5] * B[5] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[6] * B[4] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[7] * B[3] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "str r4, [%[r], #40]\n\t" "mov r4, #0\n\t" /* A[4] * B[7] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[5] * B[6] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[6] * B[5] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[7] * B[4] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "str r5, [%[r], #44]\n\t" "mov r5, #0\n\t" /* A[5] * B[7] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[6] * B[6] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[7] * B[5] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" "str r3, [%[r], #48]\n\t" "mov r3, #0\n\t" /* A[6] * B[7] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[7] * B[6] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "str r4, [%[r], #52]\n\t" "mov r4, #0\n\t" /* A[7] * B[7] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adc r3, r3, r7\n\t" + "adc r3, r3, r8\n\t" "str r5, [%[r], #56]\n\t" "str r3, [%[r], #60]\n\t" /* Transfer tmp to r */ @@ -714,7 +714,7 @@ SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a, "str r6, [%[r], #28]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r8" ); } @@ -734,315 +734,315 @@ SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) "str r3, [%[tmp], #0]\n\t" "mov r3, #0\n\t" /* A[0] * A[1] */ - "ldr r7, [%[a], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "str r4, [%[tmp], #4]\n\t" "mov r4, #0\n\t" /* A[0] * A[2] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[a], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adc r3, r3, r7\n\t" + "adc r3, r3, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[1] * A[1] */ "ldr r6, [%[a], #4]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "str r5, [%[tmp], #8]\n\t" "mov r5, #0\n\t" /* A[0] * A[3] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[a], #12]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[1] * A[2] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[a], #8]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r5, r5, r10\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adc r5, r5, r11\n\t" "str r3, [%[tmp], #12]\n\t" "mov r3, #0\n\t" /* A[0] * A[4] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[a], #16]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[1] * A[3] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[a], #12]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[2] * A[2] */ "ldr r6, [%[a], #8]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adc r3, r3, r10\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r4, r4, r9\n\t" + "adcs r5, r5, r10\n\t" + "adc r3, r3, r11\n\t" "str r4, [%[tmp], #16]\n\t" "mov r4, #0\n\t" /* A[0] * A[5] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[a], #20]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[1] * A[4] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[a], #16]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[2] * A[3] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r5, r5, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r10\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r5, r5, r9\n\t" + "adcs r3, r3, r10\n\t" + "adc r4, r4, r11\n\t" "str r5, [%[tmp], #20]\n\t" "mov r5, #0\n\t" /* A[0] * A[6] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[1] * A[5] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[a], #20]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[2] * A[4] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #16]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[3] * A[3] */ "ldr r6, [%[a], #12]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r5, r5, r10\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adc r5, r5, r11\n\t" "str r3, [%[tmp], #24]\n\t" "mov r3, #0\n\t" /* A[0] * A[7] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[1] * A[6] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[2] * A[5] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #20]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[3] * A[4] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[a], #16]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adc r3, r3, r10\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r4, r4, r9\n\t" + "adcs r5, r5, r10\n\t" + "adc r3, r3, r11\n\t" "str r4, [%[tmp], #28]\n\t" "mov r4, #0\n\t" /* A[1] * A[7] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[2] * A[6] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[3] * A[5] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[a], #20]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[4] * A[4] */ "ldr r6, [%[a], #16]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r5, r5, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r10\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r5, r5, r9\n\t" + "adcs r3, r3, r10\n\t" + "adc r4, r4, r11\n\t" "str r5, [%[r], #32]\n\t" "mov r5, #0\n\t" /* A[2] * A[7] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[3] * A[6] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[4] * A[5] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[a], #20]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r5, r5, r10\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adc r5, r5, r11\n\t" "str r3, [%[r], #36]\n\t" "mov r3, #0\n\t" /* A[3] * A[7] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[4] * A[6] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[5] * A[5] */ "ldr r6, [%[a], #20]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adc r3, r3, r10\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r4, r4, r9\n\t" + "adcs r5, r5, r10\n\t" + "adc r3, r3, r11\n\t" "str r4, [%[r], #40]\n\t" "mov r4, #0\n\t" /* A[4] * A[7] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[5] * A[6] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "str r5, [%[r], #44]\n\t" "mov r5, #0\n\t" /* A[5] * A[7] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[6] * A[6] */ "ldr r6, [%[a], #24]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" "str r3, [%[r], #48]\n\t" "mov r3, #0\n\t" /* A[6] * A[7] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "str r4, [%[r], #52]\n\t" "mov r4, #0\n\t" /* A[7] * A[7] */ "ldr r6, [%[a], #28]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r5, r5, r6\n\t" - "adc r3, r3, r7\n\t" + "adc r3, r3, r8\n\t" "str r5, [%[r], #56]\n\t" "str r3, [%[r], #60]\n\t" /* Transfer tmp to r */ @@ -1064,7 +1064,7 @@ SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a) "str r6, [%[r], #28]\n\t" : : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11" ); } @@ -1081,30 +1081,30 @@ SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -1184,50 +1184,50 @@ SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -1433,90 +1433,90 @@ SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -1806,170 +1806,170 @@ SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -2080,11 +2080,11 @@ SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r6, %[a]\n\t" - "mov r7, #0\n\t" + "mov r8, #0\n\t" "add r6, r6, #256\n\t" - "sub r7, r7, #1\n\t" + "sub r8, r8, #1\n\t" "\n1:\n\t" - "adds %[c], %[c], r7\n\t" + "adds %[c], %[c], r8\n\t" "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" "adcs r4, r4, r5\n\t" @@ -2098,7 +2098,7 @@ SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -2116,8 +2116,8 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, { sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, %[a]\n\t" - "add r7, r7, #256\n\t" + "mov r8, %[a]\n\t" + "add r8, r8, #256\n\t" "\n1:\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" @@ -2132,11 +2132,11 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a, "sbc %[c], %[c], %[c]\n\t" "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" - "cmp %[a], r7\n\t" + "cmp %[a], r8\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r8" ); return c; @@ -2157,63 +2157,63 @@ SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r3, #0\n\t" "mov r4, #0\n\t" - "mov r8, r3\n\t" - "mov r11, %[r]\n\t" - "mov r9, %[a]\n\t" - "mov r10, %[b]\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, r6, r9\n\t" - "mov r12, r6\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" "mov r6, #252\n\t" - "mov %[a], r8\n\t" + "mov %[a], r9\n\t" "subs %[a], %[a], r6\n\t" "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" "and %[a], %[a], r6\n\t" - "mov %[b], r8\n\t" + "mov %[b], r9\n\t" "sub %[b], %[b], %[a]\n\t" - "add %[a], %[a], r9\n\t" - "add %[b], %[b], r10\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" "\n2:\n\t" /* Multiply Start */ "ldr r6, [%[a]]\n\t" - "ldr r7, [%[b]]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply Done */ "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" - "cmp %[a], r12\n\t" + "cmp %[a], r14\n\t" "beq 3f\n\t" - "mov r6, r8\n\t" - "add r6, r6, r9\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" "ble 2b\n\t" "\n3:\n\t" - "mov %[r], r11\n\t" - "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add r7, r7, #4\n\t" - "mov r8, r7\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" - "cmp r7, r6\n\t" + "cmp r8, r6\n\t" "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" - "mov %[a], r9\n\t" - "mov %[b], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -2230,47 +2230,47 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r5, #0\n\t" - "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" "mov r6, #2\n\t" "lsl r6, r6, #8\n\t" "neg r6, r6\n\t" "add sp, sp, r6\n\t" - "mov r10, sp\n\t" - "mov r9, %[a]\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r6, #252\n\t" - "mov %[a], r8\n\t" + "mov %[a], r9\n\t" "subs %[a], %[a], r6\n\t" "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" "and %[a], %[a], r6\n\t" - "mov r2, r8\n\t" + "mov r2, r9\n\t" "sub r2, r2, %[a]\n\t" - "add %[a], %[a], r9\n\t" - "add r2, r2, r9\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" "beq 4f\n\t" /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" - "ldr r7, [r2]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ "bal 5f\n\t" "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Square: Done */ "\n5:\n\t" @@ -2278,33 +2278,33 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "sub r2, r2, #4\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, r6, r9\n\t" + "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" "beq 3f\n\t" "cmp %[a], r2\n\t" "bgt 3f\n\t" - "mov r7, r8\n\t" - "add r7, r7, r9\n\t" - "cmp %[a], r7\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" "ble 2b\n\t" "\n3:\n\t" - "mov %[r], r10\n\t" - "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "add r7, r7, #4\n\t" - "mov r8, r7\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" - "cmp r7, r6\n\t" + "cmp r8, r6\n\t" "ble 1b\n\t" - "mov %[a], r9\n\t" - "str r3, [%[r], r7]\n\t" - "mov %[r], r11\n\t" "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" "mov r3, #1\n\t" "lsl r3, r3, #8\n\t" "add r3, r3, #252\n\t" @@ -2318,7 +2318,7 @@ SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" ); } @@ -2355,11 +2355,11 @@ SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r6, %[a]\n\t" - "mov r7, #0\n\t" + "mov r8, #0\n\t" "add r6, r6, #128\n\t" - "sub r7, r7, #1\n\t" + "sub r8, r8, #1\n\t" "\n1:\n\t" - "adds %[c], %[c], r7\n\t" + "adds %[c], %[c], r8\n\t" "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" "adcs r4, r4, r5\n\t" @@ -2373,7 +2373,7 @@ SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -2391,8 +2391,8 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, { sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, %[a]\n\t" - "add r7, r7, #128\n\t" + "mov r8, %[a]\n\t" + "add r8, r8, #128\n\t" "\n1:\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" @@ -2407,11 +2407,11 @@ SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a, "sbc %[c], %[c], %[c]\n\t" "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" - "cmp %[a], r7\n\t" + "cmp %[a], r8\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r8" ); return c; @@ -2432,60 +2432,60 @@ SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r3, #0\n\t" "mov r4, #0\n\t" - "mov r8, r3\n\t" - "mov r11, %[r]\n\t" - "mov r9, %[a]\n\t" - "mov r10, %[b]\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" "mov r6, #128\n\t" - "add r6, r6, r9\n\t" - "mov r12, r6\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" "mov r6, #124\n\t" - "mov %[a], r8\n\t" + "mov %[a], r9\n\t" "subs %[a], %[a], r6\n\t" "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" "and %[a], %[a], r6\n\t" - "mov %[b], r8\n\t" + "mov %[b], r9\n\t" "sub %[b], %[b], %[a]\n\t" - "add %[a], %[a], r9\n\t" - "add %[b], %[b], r10\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" "\n2:\n\t" /* Multiply Start */ "ldr r6, [%[a]]\n\t" - "ldr r7, [%[b]]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply Done */ "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" - "cmp %[a], r12\n\t" + "cmp %[a], r14\n\t" "beq 3f\n\t" - "mov r6, r8\n\t" - "add r6, r6, r9\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" "ble 2b\n\t" "\n3:\n\t" - "mov %[r], r11\n\t" - "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add r7, r7, #4\n\t" - "mov r8, r7\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" "mov r6, #248\n\t" - "cmp r7, r6\n\t" + "cmp r8, r6\n\t" "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" - "mov %[a], r9\n\t" - "mov %[b], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -2502,78 +2502,78 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r5, #0\n\t" - "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "neg r6, r6\n\t" "add sp, sp, r6\n\t" - "mov r10, sp\n\t" - "mov r9, %[a]\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r6, #124\n\t" - "mov %[a], r8\n\t" + "mov %[a], r9\n\t" "subs %[a], %[a], r6\n\t" "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" "and %[a], %[a], r6\n\t" - "mov r2, r8\n\t" + "mov r2, r9\n\t" "sub r2, r2, %[a]\n\t" - "add %[a], %[a], r9\n\t" - "add r2, r2, r9\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" "beq 4f\n\t" /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" - "ldr r7, [r2]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ "bal 5f\n\t" "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Square: Done */ "\n5:\n\t" "add %[a], %[a], #4\n\t" "sub r2, r2, #4\n\t" "mov r6, #128\n\t" - "add r6, r6, r9\n\t" + "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" "beq 3f\n\t" "cmp %[a], r2\n\t" "bgt 3f\n\t" - "mov r7, r8\n\t" - "add r7, r7, r9\n\t" - "cmp %[a], r7\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" "ble 2b\n\t" "\n3:\n\t" - "mov %[r], r10\n\t" - "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "add r7, r7, #4\n\t" - "mov r8, r7\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" "mov r6, #248\n\t" - "cmp r7, r6\n\t" + "cmp r8, r6\n\t" "ble 1b\n\t" - "mov %[a], r9\n\t" - "str r3, [%[r], r7]\n\t" - "mov %[r], r11\n\t" "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" "mov r3, #252\n\t" "\n4:\n\t" "ldr r6, [%[a], r3]\n\t" @@ -2585,7 +2585,7 @@ SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" ); } @@ -2621,7 +2621,7 @@ SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "add r8, %[a], #256\n\t" + "add r9, %[a], #256\n\t" /* A[0] * B */ "ldr r6, [%[a]], #4\n\t" "umull r5, r3, r6, %[b]\n\t" @@ -2632,20 +2632,20 @@ SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, "mov r5, #0\n\t" /* A[] * B */ "ldr r6, [%[a]], #4\n\t" - "umull r6, r7, r6, %[b]\n\t" + "umull r6, r8, r6, %[b]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[] * B - Done */ "str r3, [%[r]], #4\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "cmp %[a], r8\n\t" + "cmp %[a], r9\n\t" "blt 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9" ); } @@ -2679,23 +2679,23 @@ SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r5, #128\n\t" - "mov r8, r5\n\t" - "mov r7, #0\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" "\n1:\n\t" - "ldr r6, [%[b], r7]\n\t" + "ldr r6, [%[b], r8]\n\t" "and r6, r6, %[m]\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" - "ldr r5, [%[a], r7]\n\t" + "ldr r5, [%[a], r8]\n\t" "sbcs r5, r5, r6\n\t" "sbcs %[c], %[c], %[c]\n\t" - "str r5, [%[r], r7]\n\t" - "add r7, r7, #4\n\t" - "cmp r7, r8\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" "blt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + : "memory", "r5", "r6", "r8", "r9" ); return c; @@ -2713,28 +2713,27 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, sp_digit ca = 0; __asm__ __volatile__ ( - "mov r8, %[mp]\n\t" + "mov r9, %[mp]\n\t" "mov r12, %[m]\n\t" - "mov r9, %[a]\n\t" + "mov r10, %[a]\n\t" "mov r4, #0\n\t" - "add r11, r9, #128\n\t" + "add r11, r10, #128\n\t" "\n1:\n\t" /* mu = a[i] * mp */ - "mov %[mp], r8\n\t" - "ldr %[a], [r9]\n\t" + "mov %[mp], r9\n\t" + "ldr %[a], [r10]\n\t" "mul %[mp], %[mp], %[a]\n\t" "mov %[m], r12\n\t" - "mov r10, r9\n\t" - "add r14, r9, #120\n\t" + "add r14, r10, #120\n\t" "\n2:\n\t" /* a[i+j] += m[j] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" "adc r5, r5, #0\n\t" @@ -2743,10 +2742,10 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, "ldr %[a], [r10]\n\t" "mov r4, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r4, r4, r7\n\t" + "adc r4, r4, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r5, r5, %[a]\n\t" "adc r4, r4, #0\n\t" @@ -2757,10 +2756,10 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" "adc r5, r5, #0\n\t" @@ -2769,28 +2768,28 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, "mov r4, %[ca]\n\t" "mov %[ca], #0\n\t" /* Multiply m[31] and mu - Start */ - "ldr r7, [%[m]]\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]]\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds r5, r5, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc %[ca], %[ca], #0\n\t" /* Multiply m[31] and mu - Done */ "ldr r6, [r10]\n\t" - "ldr r7, [r10, #4]\n\t" + "ldr r8, [r10, #4]\n\t" "adds r6, r6, r5\n\t" - "adcs r7, r7, r4\n\t" + "adcs r8, r8, r4\n\t" "adc %[ca], %[ca], #0\n\t" "str r6, [r10]\n\t" - "str r7, [r10, #4]\n\t" + "str r8, [r10, #4]\n\t" /* Next word in a */ - "add r9, r9, #4\n\t" - "cmp r9, r11\n\t" + "sub r10, r10, #120\n\t" + "cmp r10, r11\n\t" "blt 1b\n\t" - "mov %[a], r9\n\t" + "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca); @@ -2836,7 +2835,7 @@ SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "add r8, %[a], #128\n\t" + "add r9, %[a], #128\n\t" /* A[0] * B */ "ldr r6, [%[a]], #4\n\t" "umull r5, r3, r6, %[b]\n\t" @@ -2847,20 +2846,20 @@ SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, "mov r5, #0\n\t" /* A[] * B */ "ldr r6, [%[a]], #4\n\t" - "umull r6, r7, r6, %[b]\n\t" + "umull r6, r8, r6, %[b]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[] * B - Done */ "str r3, [%[r]], #4\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "cmp %[a], r8\n\t" + "cmp %[a], r9\n\t" "blt 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9" ); } @@ -2882,36 +2881,36 @@ SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0, "lsr r6, %[div], #16\n\t" "add r6, r6, #1\n\t" "udiv r4, %[d1], r6\n\t" - "lsl r7, r4, #16\n\t" - "umull r4, r5, %[div], r7\n\t" + "lsl r8, r4, #16\n\t" + "umull r4, r5, %[div], r8\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "udiv r5, %[d1], r6\n\t" "lsl r4, r5, #16\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "lsl r4, %[d1], #16\n\t" "orr r4, r4, %[d0], lsr #16\n\t" "udiv r4, r4, r6\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "lsl r4, %[d1], #16\n\t" "orr r4, r4, %[d0], lsr #16\n\t" "udiv r4, r4, r6\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "udiv r4, %[d0], %[div]\n\t" - "add r7, r7, r4\n\t" - "mov %[r], r7\n\t" + "add r8, r8, r4\n\t" + "mov %[r], r8\n\t" : [r] "+r" (r) : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7" + : "r4", "r5", "r6", "r8" ); return r; } @@ -2933,27 +2932,27 @@ SP_NOINLINE static int32_t sp_2048_cmp_32(const sp_digit* a, const sp_digit* b) "mvn r3, r3\n\t" "mov r6, #124\n\t" "\n1:\n\t" - "ldr r7, [%[a], r6]\n\t" + "ldr r8, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" - "and r7, r7, r3\n\t" + "and r8, r8, r3\n\t" "and r5, r5, r3\n\t" - "mov r4, r7\n\t" - "subs r7, r7, r5\n\t" - "sbc r7, r7, r7\n\t" - "add %[r], %[r], r7\n\t" - "mvn r7, r7\n\t" - "and r3, r3, r7\n\t" + "mov r4, r8\n\t" + "subs r8, r8, r5\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" "subs r5, r5, r4\n\t" - "sbc r7, r7, r7\n\t" - "sub %[r], %[r], r7\n\t" - "mvn r7, r7\n\t" - "and r3, r3, r7\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" "bge 1b\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "r3", "r4", "r5", "r6", "r7" + : "r3", "r4", "r5", "r6", "r8" ); return r; @@ -3326,23 +3325,23 @@ SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r5, #1\n\t" "lsl r5, r5, #8\n\t" - "mov r8, r5\n\t" - "mov r7, #0\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" "\n1:\n\t" - "ldr r6, [%[b], r7]\n\t" + "ldr r6, [%[b], r8]\n\t" "and r6, r6, %[m]\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" - "ldr r5, [%[a], r7]\n\t" + "ldr r5, [%[a], r8]\n\t" "sbcs r5, r5, r6\n\t" "sbcs %[c], %[c], %[c]\n\t" - "str r5, [%[r], r7]\n\t" - "add r7, r7, #4\n\t" - "cmp r7, r8\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" "blt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + : "memory", "r5", "r6", "r8", "r9" ); return c; @@ -3360,28 +3359,27 @@ SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, sp_digit ca = 0; __asm__ __volatile__ ( - "mov r8, %[mp]\n\t" + "mov r9, %[mp]\n\t" "mov r12, %[m]\n\t" - "mov r9, %[a]\n\t" + "mov r10, %[a]\n\t" "mov r4, #0\n\t" - "add r11, r9, #256\n\t" + "add r11, r10, #256\n\t" "\n1:\n\t" /* mu = a[i] * mp */ - "mov %[mp], r8\n\t" - "ldr %[a], [r9]\n\t" + "mov %[mp], r9\n\t" + "ldr %[a], [r10]\n\t" "mul %[mp], %[mp], %[a]\n\t" "mov %[m], r12\n\t" - "mov r10, r9\n\t" - "add r14, r9, #248\n\t" + "add r14, r10, #248\n\t" "\n2:\n\t" /* a[i+j] += m[j] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" "adc r5, r5, #0\n\t" @@ -3390,10 +3388,10 @@ SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, "ldr %[a], [r10]\n\t" "mov r4, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r4, r4, r7\n\t" + "adc r4, r4, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r5, r5, %[a]\n\t" "adc r4, r4, #0\n\t" @@ -3404,10 +3402,10 @@ SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" "adc r5, r5, #0\n\t" @@ -3416,28 +3414,28 @@ SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m, "mov r4, %[ca]\n\t" "mov %[ca], #0\n\t" /* Multiply m[63] and mu - Start */ - "ldr r7, [%[m]]\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]]\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds r5, r5, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc %[ca], %[ca], #0\n\t" /* Multiply m[63] and mu - Done */ "ldr r6, [r10]\n\t" - "ldr r7, [r10, #4]\n\t" + "ldr r8, [r10, #4]\n\t" "adds r6, r6, r5\n\t" - "adcs r7, r7, r4\n\t" + "adcs r8, r8, r4\n\t" "adc %[ca], %[ca], #0\n\t" "str r6, [r10]\n\t" - "str r7, [r10, #4]\n\t" + "str r8, [r10, #4]\n\t" /* Next word in a */ - "add r9, r9, #4\n\t" - "cmp r9, r11\n\t" + "sub r10, r10, #248\n\t" + "cmp r10, r11\n\t" "blt 1b\n\t" - "mov %[a], r9\n\t" + "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca); @@ -3491,36 +3489,36 @@ SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0, "lsr r6, %[div], #16\n\t" "add r6, r6, #1\n\t" "udiv r4, %[d1], r6\n\t" - "lsl r7, r4, #16\n\t" - "umull r4, r5, %[div], r7\n\t" + "lsl r8, r4, #16\n\t" + "umull r4, r5, %[div], r8\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "udiv r5, %[d1], r6\n\t" "lsl r4, r5, #16\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "lsl r4, %[d1], #16\n\t" "orr r4, r4, %[d0], lsr #16\n\t" "udiv r4, r4, r6\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "lsl r4, %[d1], #16\n\t" "orr r4, r4, %[d0], lsr #16\n\t" "udiv r4, r4, r6\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "udiv r4, %[d0], %[div]\n\t" - "add r7, r7, r4\n\t" - "mov %[r], r7\n\t" + "add r8, r8, r4\n\t" + "mov %[r], r8\n\t" : [r] "+r" (r) : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7" + : "r4", "r5", "r6", "r8" ); return r; } @@ -3572,27 +3570,27 @@ SP_NOINLINE static int32_t sp_2048_cmp_64(const sp_digit* a, const sp_digit* b) "mvn r3, r3\n\t" "mov r6, #252\n\t" "\n1:\n\t" - "ldr r7, [%[a], r6]\n\t" + "ldr r8, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" - "and r7, r7, r3\n\t" + "and r8, r8, r3\n\t" "and r5, r5, r3\n\t" - "mov r4, r7\n\t" - "subs r7, r7, r5\n\t" - "sbc r7, r7, r7\n\t" - "add %[r], %[r], r7\n\t" - "mvn r7, r7\n\t" - "and r3, r3, r7\n\t" + "mov r4, r8\n\t" + "subs r8, r8, r5\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" "subs r5, r5, r4\n\t" - "sbc r7, r7, r7\n\t" - "sub %[r], %[r], r7\n\t" - "mvn r7, r7\n\t" - "and r3, r3, r7\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" "bge 1b\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "r3", "r4", "r5", "r6", "r7" + : "r3", "r4", "r5", "r6", "r8" ); return r; @@ -5156,60 +5154,60 @@ SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r3, #0\n\t" "mov r4, #0\n\t" - "mov r8, r3\n\t" - "mov r11, %[r]\n\t" - "mov r9, %[a]\n\t" - "mov r10, %[b]\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" "mov r6, #48\n\t" - "add r6, r6, r9\n\t" - "mov r12, r6\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" "mov r6, #44\n\t" - "mov %[a], r8\n\t" + "mov %[a], r9\n\t" "subs %[a], %[a], r6\n\t" "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" "and %[a], %[a], r6\n\t" - "mov %[b], r8\n\t" + "mov %[b], r9\n\t" "sub %[b], %[b], %[a]\n\t" - "add %[a], %[a], r9\n\t" - "add %[b], %[b], r10\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" "\n2:\n\t" /* Multiply Start */ "ldr r6, [%[a]]\n\t" - "ldr r7, [%[b]]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply Done */ "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" - "cmp %[a], r12\n\t" + "cmp %[a], r14\n\t" "beq 3f\n\t" - "mov r6, r8\n\t" - "add r6, r6, r9\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" "ble 2b\n\t" "\n3:\n\t" - "mov %[r], r11\n\t" - "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add r7, r7, #4\n\t" - "mov r8, r7\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" "mov r6, #88\n\t" - "cmp r7, r6\n\t" + "cmp r8, r6\n\t" "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" - "mov %[a], r9\n\t" - "mov %[b], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -5226,77 +5224,77 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r5, #0\n\t" - "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" "mov r6, #96\n\t" "neg r6, r6\n\t" "add sp, sp, r6\n\t" - "mov r10, sp\n\t" - "mov r9, %[a]\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r6, #44\n\t" - "mov %[a], r8\n\t" + "mov %[a], r9\n\t" "subs %[a], %[a], r6\n\t" "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" "and %[a], %[a], r6\n\t" - "mov r2, r8\n\t" + "mov r2, r9\n\t" "sub r2, r2, %[a]\n\t" - "add %[a], %[a], r9\n\t" - "add r2, r2, r9\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" "beq 4f\n\t" /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" - "ldr r7, [r2]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ "bal 5f\n\t" "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Square: Done */ "\n5:\n\t" "add %[a], %[a], #4\n\t" "sub r2, r2, #4\n\t" "mov r6, #48\n\t" - "add r6, r6, r9\n\t" + "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" "beq 3f\n\t" "cmp %[a], r2\n\t" "bgt 3f\n\t" - "mov r7, r8\n\t" - "add r7, r7, r9\n\t" - "cmp %[a], r7\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" "ble 2b\n\t" "\n3:\n\t" - "mov %[r], r10\n\t" - "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "add r7, r7, #4\n\t" - "mov r8, r7\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" "mov r6, #88\n\t" - "cmp r7, r6\n\t" + "cmp r8, r6\n\t" "ble 1b\n\t" - "mov %[a], r9\n\t" - "str r3, [%[r], r7]\n\t" - "mov %[r], r11\n\t" "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" "mov r3, #92\n\t" "\n4:\n\t" "ldr r6, [%[a], r3]\n\t" @@ -5307,7 +5305,7 @@ SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" ); } @@ -5324,40 +5322,40 @@ SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -5457,70 +5455,70 @@ SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -5770,130 +5768,130 @@ SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -6263,250 +6261,250 @@ SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -6617,11 +6615,11 @@ SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r6, %[a]\n\t" - "mov r7, #0\n\t" + "mov r8, #0\n\t" "add r6, r6, #384\n\t" - "sub r7, r7, #1\n\t" + "sub r8, r8, #1\n\t" "\n1:\n\t" - "adds %[c], %[c], r7\n\t" + "adds %[c], %[c], r8\n\t" "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" "adcs r4, r4, r5\n\t" @@ -6635,7 +6633,7 @@ SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -6653,8 +6651,8 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, { sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, %[a]\n\t" - "add r7, r7, #384\n\t" + "mov r8, %[a]\n\t" + "add r8, r8, #384\n\t" "\n1:\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" @@ -6669,11 +6667,11 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a, "sbc %[c], %[c], %[c]\n\t" "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" - "cmp %[a], r7\n\t" + "cmp %[a], r8\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r8" ); return c; @@ -6694,66 +6692,66 @@ SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r3, #0\n\t" "mov r4, #0\n\t" - "mov r8, r3\n\t" - "mov r11, %[r]\n\t" - "mov r9, %[a]\n\t" - "mov r10, %[b]\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #128\n\t" - "add r6, r6, r9\n\t" - "mov r12, r6\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #124\n\t" - "mov %[a], r8\n\t" + "mov %[a], r9\n\t" "subs %[a], %[a], r6\n\t" "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" "and %[a], %[a], r6\n\t" - "mov %[b], r8\n\t" + "mov %[b], r9\n\t" "sub %[b], %[b], %[a]\n\t" - "add %[a], %[a], r9\n\t" - "add %[b], %[b], r10\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" "\n2:\n\t" /* Multiply Start */ "ldr r6, [%[a]]\n\t" - "ldr r7, [%[b]]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply Done */ "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" - "cmp %[a], r12\n\t" + "cmp %[a], r14\n\t" "beq 3f\n\t" - "mov r6, r8\n\t" - "add r6, r6, r9\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" "ble 2b\n\t" "\n3:\n\t" - "mov %[r], r11\n\t" - "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add r7, r7, #4\n\t" - "mov r8, r7\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" "mov r6, #2\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" - "cmp r7, r6\n\t" + "cmp r8, r6\n\t" "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" - "mov %[a], r9\n\t" - "mov %[b], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -6770,49 +6768,49 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r5, #0\n\t" - "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" "mov r6, #3\n\t" "lsl r6, r6, #8\n\t" "neg r6, r6\n\t" "add sp, sp, r6\n\t" - "mov r10, sp\n\t" - "mov r9, %[a]\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #124\n\t" - "mov %[a], r8\n\t" + "mov %[a], r9\n\t" "subs %[a], %[a], r6\n\t" "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" "and %[a], %[a], r6\n\t" - "mov r2, r8\n\t" + "mov r2, r9\n\t" "sub r2, r2, %[a]\n\t" - "add %[a], %[a], r9\n\t" - "add r2, r2, r9\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" "beq 4f\n\t" /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" - "ldr r7, [r2]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ "bal 5f\n\t" "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Square: Done */ "\n5:\n\t" @@ -6821,33 +6819,33 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #128\n\t" - "add r6, r6, r9\n\t" + "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" "beq 3f\n\t" "cmp %[a], r2\n\t" "bgt 3f\n\t" - "mov r7, r8\n\t" - "add r7, r7, r9\n\t" - "cmp %[a], r7\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" "ble 2b\n\t" "\n3:\n\t" - "mov %[r], r10\n\t" - "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "add r7, r7, #4\n\t" - "mov r8, r7\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" "mov r6, #2\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" - "cmp r7, r6\n\t" + "cmp r8, r6\n\t" "ble 1b\n\t" - "mov %[a], r9\n\t" - "str r3, [%[r], r7]\n\t" - "mov %[r], r11\n\t" "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" "mov r3, #2\n\t" "lsl r3, r3, #8\n\t" "add r3, r3, #252\n\t" @@ -6861,7 +6859,7 @@ SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" ); } @@ -6898,11 +6896,11 @@ SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r6, %[a]\n\t" - "mov r7, #0\n\t" + "mov r8, #0\n\t" "add r6, r6, #192\n\t" - "sub r7, r7, #1\n\t" + "sub r8, r8, #1\n\t" "\n1:\n\t" - "adds %[c], %[c], r7\n\t" + "adds %[c], %[c], r8\n\t" "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" "adcs r4, r4, r5\n\t" @@ -6916,7 +6914,7 @@ SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -6934,8 +6932,8 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, { sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, %[a]\n\t" - "add r7, r7, #192\n\t" + "mov r8, %[a]\n\t" + "add r8, r8, #192\n\t" "\n1:\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" @@ -6950,11 +6948,11 @@ SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a, "sbc %[c], %[c], %[c]\n\t" "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" - "cmp %[a], r7\n\t" + "cmp %[a], r8\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r8" ); return c; @@ -6975,62 +6973,62 @@ SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r3, #0\n\t" "mov r4, #0\n\t" - "mov r8, r3\n\t" - "mov r11, %[r]\n\t" - "mov r9, %[a]\n\t" - "mov r10, %[b]\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" "mov r6, #192\n\t" - "add r6, r6, r9\n\t" - "mov r12, r6\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" "mov r6, #188\n\t" - "mov %[a], r8\n\t" + "mov %[a], r9\n\t" "subs %[a], %[a], r6\n\t" "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" "and %[a], %[a], r6\n\t" - "mov %[b], r8\n\t" + "mov %[b], r9\n\t" "sub %[b], %[b], %[a]\n\t" - "add %[a], %[a], r9\n\t" - "add %[b], %[b], r10\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" "\n2:\n\t" /* Multiply Start */ "ldr r6, [%[a]]\n\t" - "ldr r7, [%[b]]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply Done */ "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" - "cmp %[a], r12\n\t" + "cmp %[a], r14\n\t" "beq 3f\n\t" - "mov r6, r8\n\t" - "add r6, r6, r9\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" "ble 2b\n\t" "\n3:\n\t" - "mov %[r], r11\n\t" - "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add r7, r7, #4\n\t" - "mov r8, r7\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #120\n\t" - "cmp r7, r6\n\t" + "cmp r8, r6\n\t" "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" - "mov %[a], r9\n\t" - "mov %[b], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -7047,81 +7045,81 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r5, #0\n\t" - "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #128\n\t" "neg r6, r6\n\t" "add sp, sp, r6\n\t" - "mov r10, sp\n\t" - "mov r9, %[a]\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r6, #188\n\t" - "mov %[a], r8\n\t" + "mov %[a], r9\n\t" "subs %[a], %[a], r6\n\t" "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" "and %[a], %[a], r6\n\t" - "mov r2, r8\n\t" + "mov r2, r9\n\t" "sub r2, r2, %[a]\n\t" - "add %[a], %[a], r9\n\t" - "add r2, r2, r9\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" "beq 4f\n\t" /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" - "ldr r7, [r2]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ "bal 5f\n\t" "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Square: Done */ "\n5:\n\t" "add %[a], %[a], #4\n\t" "sub r2, r2, #4\n\t" "mov r6, #192\n\t" - "add r6, r6, r9\n\t" + "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" "beq 3f\n\t" "cmp %[a], r2\n\t" "bgt 3f\n\t" - "mov r7, r8\n\t" - "add r7, r7, r9\n\t" - "cmp %[a], r7\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" "ble 2b\n\t" "\n3:\n\t" - "mov %[r], r10\n\t" - "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "add r7, r7, #4\n\t" - "mov r8, r7\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #120\n\t" - "cmp r7, r6\n\t" + "cmp r8, r6\n\t" "ble 1b\n\t" - "mov %[a], r9\n\t" - "str r3, [%[r], r7]\n\t" - "mov %[r], r11\n\t" "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" "mov r3, #1\n\t" "lsl r3, r3, #8\n\t" "add r3, r3, #124\n\t" @@ -7136,7 +7134,7 @@ SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" ); } @@ -7172,7 +7170,7 @@ SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "add r8, %[a], #384\n\t" + "add r9, %[a], #384\n\t" /* A[0] * B */ "ldr r6, [%[a]], #4\n\t" "umull r5, r3, r6, %[b]\n\t" @@ -7183,20 +7181,20 @@ SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, "mov r5, #0\n\t" /* A[] * B */ "ldr r6, [%[a]], #4\n\t" - "umull r6, r7, r6, %[b]\n\t" + "umull r6, r8, r6, %[b]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[] * B - Done */ "str r3, [%[r]], #4\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "cmp %[a], r8\n\t" + "cmp %[a], r9\n\t" "blt 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9" ); } @@ -7230,23 +7228,23 @@ SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r5, #192\n\t" - "mov r8, r5\n\t" - "mov r7, #0\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" "\n1:\n\t" - "ldr r6, [%[b], r7]\n\t" + "ldr r6, [%[b], r8]\n\t" "and r6, r6, %[m]\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" - "ldr r5, [%[a], r7]\n\t" + "ldr r5, [%[a], r8]\n\t" "sbcs r5, r5, r6\n\t" "sbcs %[c], %[c], %[c]\n\t" - "str r5, [%[r], r7]\n\t" - "add r7, r7, #4\n\t" - "cmp r7, r8\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" "blt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + : "memory", "r5", "r6", "r8", "r9" ); return c; @@ -7264,28 +7262,27 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, sp_digit ca = 0; __asm__ __volatile__ ( - "mov r8, %[mp]\n\t" + "mov r9, %[mp]\n\t" "mov r12, %[m]\n\t" - "mov r9, %[a]\n\t" + "mov r10, %[a]\n\t" "mov r4, #0\n\t" - "add r11, r9, #192\n\t" + "add r11, r10, #192\n\t" "\n1:\n\t" /* mu = a[i] * mp */ - "mov %[mp], r8\n\t" - "ldr %[a], [r9]\n\t" + "mov %[mp], r9\n\t" + "ldr %[a], [r10]\n\t" "mul %[mp], %[mp], %[a]\n\t" "mov %[m], r12\n\t" - "mov r10, r9\n\t" - "add r14, r9, #184\n\t" + "add r14, r10, #184\n\t" "\n2:\n\t" /* a[i+j] += m[j] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" "adc r5, r5, #0\n\t" @@ -7294,10 +7291,10 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, "ldr %[a], [r10]\n\t" "mov r4, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r4, r4, r7\n\t" + "adc r4, r4, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r5, r5, %[a]\n\t" "adc r4, r4, #0\n\t" @@ -7308,10 +7305,10 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" "adc r5, r5, #0\n\t" @@ -7320,28 +7317,28 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, "mov r4, %[ca]\n\t" "mov %[ca], #0\n\t" /* Multiply m[47] and mu - Start */ - "ldr r7, [%[m]]\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]]\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds r5, r5, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc %[ca], %[ca], #0\n\t" /* Multiply m[47] and mu - Done */ "ldr r6, [r10]\n\t" - "ldr r7, [r10, #4]\n\t" + "ldr r8, [r10, #4]\n\t" "adds r6, r6, r5\n\t" - "adcs r7, r7, r4\n\t" + "adcs r8, r8, r4\n\t" "adc %[ca], %[ca], #0\n\t" "str r6, [r10]\n\t" - "str r7, [r10, #4]\n\t" + "str r8, [r10, #4]\n\t" /* Next word in a */ - "add r9, r9, #4\n\t" - "cmp r9, r11\n\t" + "sub r10, r10, #184\n\t" + "cmp r10, r11\n\t" "blt 1b\n\t" - "mov %[a], r9\n\t" + "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca); @@ -7387,7 +7384,7 @@ SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "add r8, %[a], #192\n\t" + "add r9, %[a], #192\n\t" /* A[0] * B */ "ldr r6, [%[a]], #4\n\t" "umull r5, r3, r6, %[b]\n\t" @@ -7398,20 +7395,20 @@ SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, "mov r5, #0\n\t" /* A[] * B */ "ldr r6, [%[a]], #4\n\t" - "umull r6, r7, r6, %[b]\n\t" + "umull r6, r8, r6, %[b]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[] * B - Done */ "str r3, [%[r]], #4\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "cmp %[a], r8\n\t" + "cmp %[a], r9\n\t" "blt 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9" ); } @@ -7433,36 +7430,36 @@ SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0, "lsr r6, %[div], #16\n\t" "add r6, r6, #1\n\t" "udiv r4, %[d1], r6\n\t" - "lsl r7, r4, #16\n\t" - "umull r4, r5, %[div], r7\n\t" + "lsl r8, r4, #16\n\t" + "umull r4, r5, %[div], r8\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "udiv r5, %[d1], r6\n\t" "lsl r4, r5, #16\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "lsl r4, %[d1], #16\n\t" "orr r4, r4, %[d0], lsr #16\n\t" "udiv r4, r4, r6\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "lsl r4, %[d1], #16\n\t" "orr r4, r4, %[d0], lsr #16\n\t" "udiv r4, r4, r6\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "udiv r4, %[d0], %[div]\n\t" - "add r7, r7, r4\n\t" - "mov %[r], r7\n\t" + "add r8, r8, r4\n\t" + "mov %[r], r8\n\t" : [r] "+r" (r) : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7" + : "r4", "r5", "r6", "r8" ); return r; } @@ -7484,27 +7481,27 @@ SP_NOINLINE static int32_t sp_3072_cmp_48(const sp_digit* a, const sp_digit* b) "mvn r3, r3\n\t" "mov r6, #188\n\t" "\n1:\n\t" - "ldr r7, [%[a], r6]\n\t" + "ldr r8, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" - "and r7, r7, r3\n\t" + "and r8, r8, r3\n\t" "and r5, r5, r3\n\t" - "mov r4, r7\n\t" - "subs r7, r7, r5\n\t" - "sbc r7, r7, r7\n\t" - "add %[r], %[r], r7\n\t" - "mvn r7, r7\n\t" - "and r3, r3, r7\n\t" + "mov r4, r8\n\t" + "subs r8, r8, r5\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" "subs r5, r5, r4\n\t" - "sbc r7, r7, r7\n\t" - "sub %[r], %[r], r7\n\t" - "mvn r7, r7\n\t" - "and r3, r3, r7\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" "bge 1b\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "r3", "r4", "r5", "r6", "r7" + : "r3", "r4", "r5", "r6", "r8" ); return r; @@ -7878,23 +7875,23 @@ SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, "mov r5, #1\n\t" "lsl r5, r5, #8\n\t" "add r5, r5, #128\n\t" - "mov r8, r5\n\t" - "mov r7, #0\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" "\n1:\n\t" - "ldr r6, [%[b], r7]\n\t" + "ldr r6, [%[b], r8]\n\t" "and r6, r6, %[m]\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" - "ldr r5, [%[a], r7]\n\t" + "ldr r5, [%[a], r8]\n\t" "sbcs r5, r5, r6\n\t" "sbcs %[c], %[c], %[c]\n\t" - "str r5, [%[r], r7]\n\t" - "add r7, r7, #4\n\t" - "cmp r7, r8\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" "blt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + : "memory", "r5", "r6", "r8", "r9" ); return c; @@ -7912,28 +7909,27 @@ SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, sp_digit ca = 0; __asm__ __volatile__ ( - "mov r8, %[mp]\n\t" + "mov r9, %[mp]\n\t" "mov r12, %[m]\n\t" - "mov r9, %[a]\n\t" + "mov r10, %[a]\n\t" "mov r4, #0\n\t" - "add r11, r9, #384\n\t" + "add r11, r10, #384\n\t" "\n1:\n\t" /* mu = a[i] * mp */ - "mov %[mp], r8\n\t" - "ldr %[a], [r9]\n\t" + "mov %[mp], r9\n\t" + "ldr %[a], [r10]\n\t" "mul %[mp], %[mp], %[a]\n\t" "mov %[m], r12\n\t" - "mov r10, r9\n\t" - "add r14, r9, #376\n\t" + "add r14, r10, #376\n\t" "\n2:\n\t" /* a[i+j] += m[j] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" "adc r5, r5, #0\n\t" @@ -7942,10 +7938,10 @@ SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, "ldr %[a], [r10]\n\t" "mov r4, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r4, r4, r7\n\t" + "adc r4, r4, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r5, r5, %[a]\n\t" "adc r4, r4, #0\n\t" @@ -7956,10 +7952,10 @@ SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" "adc r5, r5, #0\n\t" @@ -7968,28 +7964,28 @@ SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m, "mov r4, %[ca]\n\t" "mov %[ca], #0\n\t" /* Multiply m[95] and mu - Start */ - "ldr r7, [%[m]]\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]]\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds r5, r5, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc %[ca], %[ca], #0\n\t" /* Multiply m[95] and mu - Done */ "ldr r6, [r10]\n\t" - "ldr r7, [r10, #4]\n\t" + "ldr r8, [r10, #4]\n\t" "adds r6, r6, r5\n\t" - "adcs r7, r7, r4\n\t" + "adcs r8, r8, r4\n\t" "adc %[ca], %[ca], #0\n\t" "str r6, [r10]\n\t" - "str r7, [r10, #4]\n\t" + "str r8, [r10, #4]\n\t" /* Next word in a */ - "add r9, r9, #4\n\t" - "cmp r9, r11\n\t" + "sub r10, r10, #376\n\t" + "cmp r10, r11\n\t" "blt 1b\n\t" - "mov %[a], r9\n\t" + "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca); @@ -8043,36 +8039,36 @@ SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0, "lsr r6, %[div], #16\n\t" "add r6, r6, #1\n\t" "udiv r4, %[d1], r6\n\t" - "lsl r7, r4, #16\n\t" - "umull r4, r5, %[div], r7\n\t" + "lsl r8, r4, #16\n\t" + "umull r4, r5, %[div], r8\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "udiv r5, %[d1], r6\n\t" "lsl r4, r5, #16\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "lsl r4, %[d1], #16\n\t" "orr r4, r4, %[d0], lsr #16\n\t" "udiv r4, r4, r6\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "lsl r4, %[d1], #16\n\t" "orr r4, r4, %[d0], lsr #16\n\t" "udiv r4, r4, r6\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "udiv r4, %[d0], %[div]\n\t" - "add r7, r7, r4\n\t" - "mov %[r], r7\n\t" + "add r8, r8, r4\n\t" + "mov %[r], r8\n\t" : [r] "+r" (r) : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7" + : "r4", "r5", "r6", "r8" ); return r; } @@ -8126,27 +8122,27 @@ SP_NOINLINE static int32_t sp_3072_cmp_96(const sp_digit* a, const sp_digit* b) "lsl r6, r6, #8\n\t" "add r6, r6, #124\n\t" "\n1:\n\t" - "ldr r7, [%[a], r6]\n\t" + "ldr r8, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" - "and r7, r7, r3\n\t" + "and r8, r8, r3\n\t" "and r5, r5, r3\n\t" - "mov r4, r7\n\t" - "subs r7, r7, r5\n\t" - "sbc r7, r7, r7\n\t" - "add %[r], %[r], r7\n\t" - "mvn r7, r7\n\t" - "and r3, r3, r7\n\t" + "mov r4, r8\n\t" + "subs r8, r8, r5\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" "subs r5, r5, r4\n\t" - "sbc r7, r7, r7\n\t" - "sub %[r], %[r], r7\n\t" - "mvn r7, r7\n\t" - "and r3, r3, r7\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" "bge 1b\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "r3", "r4", "r5", "r6", "r7" + : "r3", "r4", "r5", "r6", "r8" ); return r; @@ -9906,170 +9902,170 @@ SP_NOINLINE static sp_digit sp_4096_add_64(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -10429,330 +10425,330 @@ SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -10771,63 +10767,63 @@ SP_NOINLINE static void sp_4096_mul_64(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r3, #0\n\t" "mov r4, #0\n\t" - "mov r8, r3\n\t" - "mov r11, %[r]\n\t" - "mov r9, %[a]\n\t" - "mov r10, %[b]\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, r6, r9\n\t" - "mov r12, r6\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" "mov r6, #252\n\t" - "mov %[a], r8\n\t" + "mov %[a], r9\n\t" "subs %[a], %[a], r6\n\t" "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" "and %[a], %[a], r6\n\t" - "mov %[b], r8\n\t" + "mov %[b], r9\n\t" "sub %[b], %[b], %[a]\n\t" - "add %[a], %[a], r9\n\t" - "add %[b], %[b], r10\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" "\n2:\n\t" /* Multiply Start */ "ldr r6, [%[a]]\n\t" - "ldr r7, [%[b]]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply Done */ "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" - "cmp %[a], r12\n\t" + "cmp %[a], r14\n\t" "beq 3f\n\t" - "mov r6, r8\n\t" - "add r6, r6, r9\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" "ble 2b\n\t" "\n3:\n\t" - "mov %[r], r11\n\t" - "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add r7, r7, #4\n\t" - "mov r8, r7\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" - "cmp r7, r6\n\t" + "cmp r8, r6\n\t" "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" - "mov %[a], r9\n\t" - "mov %[b], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -10907,47 +10903,47 @@ SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r5, #0\n\t" - "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" "mov r6, #2\n\t" "lsl r6, r6, #8\n\t" "neg r6, r6\n\t" "add sp, sp, r6\n\t" - "mov r10, sp\n\t" - "mov r9, %[a]\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r6, #252\n\t" - "mov %[a], r8\n\t" + "mov %[a], r9\n\t" "subs %[a], %[a], r6\n\t" "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" "and %[a], %[a], r6\n\t" - "mov r2, r8\n\t" + "mov r2, r9\n\t" "sub r2, r2, %[a]\n\t" - "add %[a], %[a], r9\n\t" - "add r2, r2, r9\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" "beq 4f\n\t" /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" - "ldr r7, [r2]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ "bal 5f\n\t" "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Square: Done */ "\n5:\n\t" @@ -10955,33 +10951,33 @@ SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) "sub r2, r2, #4\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" - "add r6, r6, r9\n\t" + "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" "beq 3f\n\t" "cmp %[a], r2\n\t" "bgt 3f\n\t" - "mov r7, r8\n\t" - "add r7, r7, r9\n\t" - "cmp %[a], r7\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" "ble 2b\n\t" "\n3:\n\t" - "mov %[r], r10\n\t" - "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "add r7, r7, #4\n\t" - "mov r8, r7\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" - "cmp r7, r6\n\t" + "cmp r8, r6\n\t" "ble 1b\n\t" - "mov %[a], r9\n\t" - "str r3, [%[r], r7]\n\t" - "mov %[r], r11\n\t" "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" "mov r3, #1\n\t" "lsl r3, r3, #8\n\t" "add r3, r3, #252\n\t" @@ -10995,7 +10991,7 @@ SP_NOINLINE static void sp_4096_sqr_64(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" ); } @@ -11041,11 +11037,11 @@ SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r6, %[a]\n\t" - "mov r7, #0\n\t" + "mov r8, #0\n\t" "add r6, r6, #512\n\t" - "sub r7, r7, #1\n\t" + "sub r8, r8, #1\n\t" "\n1:\n\t" - "adds %[c], %[c], r7\n\t" + "adds %[c], %[c], r8\n\t" "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" "adcs r4, r4, r5\n\t" @@ -11059,7 +11055,7 @@ SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -11077,8 +11073,8 @@ SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, { sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, %[a]\n\t" - "add r7, r7, #512\n\t" + "mov r8, %[a]\n\t" + "add r8, r8, #512\n\t" "\n1:\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" @@ -11093,11 +11089,11 @@ SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a, "sbc %[c], %[c], %[c]\n\t" "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" - "cmp %[a], r7\n\t" + "cmp %[a], r8\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r8" ); return c; @@ -11118,65 +11114,65 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r3, #0\n\t" "mov r4, #0\n\t" - "mov r8, r3\n\t" - "mov r11, %[r]\n\t" - "mov r9, %[a]\n\t" - "mov r10, %[b]\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" + "mov r10, %[a]\n\t" + "mov r11, %[b]\n\t" "mov r6, #2\n\t" "lsl r6, r6, #8\n\t" - "add r6, r6, r9\n\t" - "mov r12, r6\n\t" + "add r6, r6, r10\n\t" + "mov r14, r6\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r5, #0\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #252\n\t" - "mov %[a], r8\n\t" + "mov %[a], r9\n\t" "subs %[a], %[a], r6\n\t" "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" "and %[a], %[a], r6\n\t" - "mov %[b], r8\n\t" + "mov %[b], r9\n\t" "sub %[b], %[b], %[a]\n\t" - "add %[a], %[a], r9\n\t" - "add %[b], %[b], r10\n\t" + "add %[a], %[a], r10\n\t" + "add %[b], %[b], r11\n\t" "\n2:\n\t" /* Multiply Start */ "ldr r6, [%[a]]\n\t" - "ldr r7, [%[b]]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b]]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply Done */ "add %[a], %[a], #4\n\t" "sub %[b], %[b], #4\n\t" - "cmp %[a], r12\n\t" + "cmp %[a], r14\n\t" "beq 3f\n\t" - "mov r6, r8\n\t" - "add r6, r6, r9\n\t" + "mov r6, r9\n\t" + "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" "ble 2b\n\t" "\n3:\n\t" - "mov %[r], r11\n\t" - "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" + "mov %[r], r12\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "add r7, r7, #4\n\t" - "mov r8, r7\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" "mov r6, #3\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" - "cmp r7, r6\n\t" + "cmp r8, r6\n\t" "ble 1b\n\t" - "str r3, [%[r], r7]\n\t" - "mov %[a], r9\n\t" - "mov %[b], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[a], r10\n\t" + "mov %[b], r11\n\t" : : [r] "r" (tmp), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); XMEMCPY(r, tmp, sizeof(tmp)); @@ -11193,49 +11189,49 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "mov r3, #0\n\t" "mov r4, #0\n\t" "mov r5, #0\n\t" - "mov r8, r3\n\t" - "mov r11, %[r]\n\t" + "mov r9, r3\n\t" + "mov r12, %[r]\n\t" "mov r6, #4\n\t" "lsl r6, r6, #8\n\t" "neg r6, r6\n\t" "add sp, sp, r6\n\t" - "mov r10, sp\n\t" - "mov r9, %[a]\n\t" + "mov r11, sp\n\t" + "mov r10, %[a]\n\t" "\n1:\n\t" "mov %[r], #0\n\t" "mov r6, #1\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #252\n\t" - "mov %[a], r8\n\t" + "mov %[a], r9\n\t" "subs %[a], %[a], r6\n\t" "sbc r6, r6, r6\n\t" "mvn r6, r6\n\t" "and %[a], %[a], r6\n\t" - "mov r2, r8\n\t" + "mov r2, r9\n\t" "sub r2, r2, %[a]\n\t" - "add %[a], %[a], r9\n\t" - "add r2, r2, r9\n\t" + "add %[a], %[a], r10\n\t" + "add r2, r2, r10\n\t" "\n2:\n\t" "cmp r2, %[a]\n\t" "beq 4f\n\t" /* Multiply * 2: Start */ "ldr r6, [%[a]]\n\t" - "ldr r7, [r2]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [r2]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Multiply * 2: Done */ "bal 5f\n\t" "\n4:\n\t" /* Square: Start */ "ldr r6, [%[a]]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, %[r]\n\t" /* Square: Done */ "\n5:\n\t" @@ -11243,33 +11239,33 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "sub r2, r2, #4\n\t" "mov r6, #2\n\t" "lsl r6, r6, #8\n\t" - "add r6, r6, r9\n\t" + "add r6, r6, r10\n\t" "cmp %[a], r6\n\t" "beq 3f\n\t" "cmp %[a], r2\n\t" "bgt 3f\n\t" - "mov r7, r8\n\t" - "add r7, r7, r9\n\t" - "cmp %[a], r7\n\t" + "mov r8, r9\n\t" + "add r8, r8, r10\n\t" + "cmp %[a], r8\n\t" "ble 2b\n\t" "\n3:\n\t" - "mov %[r], r10\n\t" - "mov r7, r8\n\t" - "str r3, [%[r], r7]\n\t" + "mov %[r], r11\n\t" + "mov r8, r9\n\t" + "str r3, [%[r], r8]\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" "mov r5, #0\n\t" - "add r7, r7, #4\n\t" - "mov r8, r7\n\t" + "add r8, r8, #4\n\t" + "mov r9, r8\n\t" "mov r6, #3\n\t" "lsl r6, r6, #8\n\t" "add r6, r6, #248\n\t" - "cmp r7, r6\n\t" + "cmp r8, r6\n\t" "ble 1b\n\t" - "mov %[a], r9\n\t" - "str r3, [%[r], r7]\n\t" - "mov %[r], r11\n\t" "mov %[a], r10\n\t" + "str r3, [%[r], r8]\n\t" + "mov %[r], r12\n\t" + "mov %[a], r11\n\t" "mov r3, #3\n\t" "lsl r3, r3, #8\n\t" "add r3, r3, #252\n\t" @@ -11283,7 +11279,7 @@ SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "add sp, sp, r6\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" ); } @@ -11317,7 +11313,7 @@ SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "add r8, %[a], #512\n\t" + "add r9, %[a], #512\n\t" /* A[0] * B */ "ldr r6, [%[a]], #4\n\t" "umull r5, r3, r6, %[b]\n\t" @@ -11328,20 +11324,20 @@ SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, "mov r5, #0\n\t" /* A[] * B */ "ldr r6, [%[a]], #4\n\t" - "umull r6, r7, r6, %[b]\n\t" + "umull r6, r8, r6, %[b]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[] * B - Done */ "str r3, [%[r]], #4\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "cmp %[a], r8\n\t" + "cmp %[a], r9\n\t" "blt 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9" ); } @@ -11377,23 +11373,23 @@ SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r5, #2\n\t" "lsl r5, r5, #8\n\t" - "mov r8, r5\n\t" - "mov r7, #0\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" "\n1:\n\t" - "ldr r6, [%[b], r7]\n\t" + "ldr r6, [%[b], r8]\n\t" "and r6, r6, %[m]\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" - "ldr r5, [%[a], r7]\n\t" + "ldr r5, [%[a], r8]\n\t" "sbcs r5, r5, r6\n\t" "sbcs %[c], %[c], %[c]\n\t" - "str r5, [%[r], r7]\n\t" - "add r7, r7, #4\n\t" - "cmp r7, r8\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" "blt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + : "memory", "r5", "r6", "r8", "r9" ); return c; @@ -11411,28 +11407,27 @@ SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, sp_digit ca = 0; __asm__ __volatile__ ( - "mov r8, %[mp]\n\t" + "mov r9, %[mp]\n\t" "mov r12, %[m]\n\t" - "mov r9, %[a]\n\t" + "mov r10, %[a]\n\t" "mov r4, #0\n\t" - "add r11, r9, #512\n\t" + "add r11, r10, #512\n\t" "\n1:\n\t" /* mu = a[i] * mp */ - "mov %[mp], r8\n\t" - "ldr %[a], [r9]\n\t" + "mov %[mp], r9\n\t" + "ldr %[a], [r10]\n\t" "mul %[mp], %[mp], %[a]\n\t" "mov %[m], r12\n\t" - "mov r10, r9\n\t" - "add r14, r9, #504\n\t" + "add r14, r10, #504\n\t" "\n2:\n\t" /* a[i+j] += m[j] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" "adc r5, r5, #0\n\t" @@ -11441,10 +11436,10 @@ SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, "ldr %[a], [r10]\n\t" "mov r4, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r4, r4, r7\n\t" + "adc r4, r4, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r5, r5, %[a]\n\t" "adc r4, r4, #0\n\t" @@ -11455,10 +11450,10 @@ SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" "adc r5, r5, #0\n\t" @@ -11467,28 +11462,28 @@ SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m, "mov r4, %[ca]\n\t" "mov %[ca], #0\n\t" /* Multiply m[127] and mu - Start */ - "ldr r7, [%[m]]\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]]\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds r5, r5, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc %[ca], %[ca], #0\n\t" /* Multiply m[127] and mu - Done */ "ldr r6, [r10]\n\t" - "ldr r7, [r10, #4]\n\t" + "ldr r8, [r10, #4]\n\t" "adds r6, r6, r5\n\t" - "adcs r7, r7, r4\n\t" + "adcs r8, r8, r4\n\t" "adc %[ca], %[ca], #0\n\t" "str r6, [r10]\n\t" - "str r7, [r10, #4]\n\t" + "str r8, [r10, #4]\n\t" /* Next word in a */ - "add r9, r9, #4\n\t" - "cmp r9, r11\n\t" + "sub r10, r10, #504\n\t" + "cmp r10, r11\n\t" "blt 1b\n\t" - "mov %[a], r9\n\t" + "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca); @@ -11542,36 +11537,36 @@ SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0, "lsr r6, %[div], #16\n\t" "add r6, r6, #1\n\t" "udiv r4, %[d1], r6\n\t" - "lsl r7, r4, #16\n\t" - "umull r4, r5, %[div], r7\n\t" + "lsl r8, r4, #16\n\t" + "umull r4, r5, %[div], r8\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "udiv r5, %[d1], r6\n\t" "lsl r4, r5, #16\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "lsl r4, %[d1], #16\n\t" "orr r4, r4, %[d0], lsr #16\n\t" "udiv r4, r4, r6\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "lsl r4, %[d1], #16\n\t" "orr r4, r4, %[d0], lsr #16\n\t" "udiv r4, r4, r6\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "udiv r4, %[d0], %[div]\n\t" - "add r7, r7, r4\n\t" - "mov %[r], r7\n\t" + "add r8, r8, r4\n\t" + "mov %[r], r8\n\t" : [r] "+r" (r) : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7" + : "r4", "r5", "r6", "r8" ); return r; } @@ -11625,27 +11620,27 @@ SP_NOINLINE static int32_t sp_4096_cmp_128(const sp_digit* a, const sp_digit* b) "lsl r6, r6, #8\n\t" "add r6, r6, #252\n\t" "\n1:\n\t" - "ldr r7, [%[a], r6]\n\t" + "ldr r8, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" - "and r7, r7, r3\n\t" + "and r8, r8, r3\n\t" "and r5, r5, r3\n\t" - "mov r4, r7\n\t" - "subs r7, r7, r5\n\t" - "sbc r7, r7, r7\n\t" - "add %[r], %[r], r7\n\t" - "mvn r7, r7\n\t" - "and r3, r3, r7\n\t" + "mov r4, r8\n\t" + "subs r8, r8, r5\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" "subs r5, r5, r4\n\t" - "sbc r7, r7, r7\n\t" - "sub %[r], %[r], r7\n\t" - "mvn r7, r7\n\t" - "and r3, r3, r7\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" "bge 1b\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "r3", "r4", "r5", "r6", "r7" + : "r3", "r4", "r5", "r6", "r8" ); return r; @@ -13774,27 +13769,27 @@ SP_NOINLINE static int32_t sp_256_cmp_8(const sp_digit* a, const sp_digit* b) "mvn r3, r3\n\t" "mov r6, #28\n\t" "\n1:\n\t" - "ldr r7, [%[a], r6]\n\t" + "ldr r8, [%[a], r6]\n\t" "ldr r5, [%[b], r6]\n\t" - "and r7, r7, r3\n\t" + "and r8, r8, r3\n\t" "and r5, r5, r3\n\t" - "mov r4, r7\n\t" - "subs r7, r7, r5\n\t" - "sbc r7, r7, r7\n\t" - "add %[r], %[r], r7\n\t" - "mvn r7, r7\n\t" - "and r3, r3, r7\n\t" + "mov r4, r8\n\t" + "subs r8, r8, r5\n\t" + "sbc r8, r8, r8\n\t" + "add %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" "subs r5, r5, r4\n\t" - "sbc r7, r7, r7\n\t" - "sub %[r], %[r], r7\n\t" - "mvn r7, r7\n\t" - "and r3, r3, r7\n\t" + "sbc r8, r8, r8\n\t" + "sub %[r], %[r], r8\n\t" + "mvn r8, r8\n\t" + "and r3, r3, r8\n\t" "sub r6, r6, #4\n\t" "cmp r6, #0\n\t" "bge 1b\n\t" : [r] "+r" (r) : [a] "r" (a), [b] "r" (b) - : "r3", "r4", "r5", "r6", "r7" + : "r3", "r4", "r5", "r6", "r8" ); return r; @@ -13821,23 +13816,23 @@ SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r5, #32\n\t" - "mov r8, r5\n\t" - "mov r7, #0\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" "\n1:\n\t" - "ldr r6, [%[b], r7]\n\t" + "ldr r6, [%[b], r8]\n\t" "and r6, r6, %[m]\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" - "ldr r5, [%[a], r7]\n\t" + "ldr r5, [%[a], r8]\n\t" "sbcs r5, r5, r6\n\t" "sbcs %[c], %[c], %[c]\n\t" - "str r5, [%[r], r7]\n\t" - "add r7, r7, #4\n\t" - "cmp r7, r8\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" "blt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r7", "r8" + : "memory", "r5", "r6", "r8", "r9" ); return c; @@ -13859,7 +13854,7 @@ SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, "mov r2, #0\n\t" "mov r1, #0\n\t" /* i = 0 */ - "mov r8, r2\n\t" + "mov r9, r2\n\t" "\n1:\n\t" "mov r4, #0\n\t" /* mu = a[i] * 1 (mp) = a[i] */ @@ -13907,7 +13902,7 @@ SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, "str r5, [%[a], #24]\n\t" /* a[i+7] += -1 * mu */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[a], #32]\n\t" + "ldr r8, [%[a], #32]\n\t" "adds r5, r1, r3\n\t" "mov r1, #0\n\t" "adc r1, r1, r2\n\t" @@ -13915,15 +13910,15 @@ SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, "sbcs r5, r5, r2\n\t" "sbc r1, r1, r2\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r1, r1, r2\n\t" "str r4, [%[a], #28]\n\t" "str r5, [%[a], #32]\n\t" /* i += 1 */ - "add r8, r8, #1\n\t" + "add r9, r9, #1\n\t" "add %[a], %[a], #4\n\t" "mov r6, #8\n\t" - "cmp r8, r6\n\t" + "cmp r9, r6\n\t" "blt 1b\n\t" "sub %[a], %[a], #32\n\t" "mov r3, r1\n\t" @@ -13932,30 +13927,30 @@ SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m, "ldr r4, [%[a],#32]\n\t" "ldr r5, [%[a],#36]\n\t" "ldr r6, [%[a],#40]\n\t" - "ldr r7, [%[a],#44]\n\t" + "ldr r8, [%[a],#44]\n\t" "subs r4, r4, r1\n\t" "sbcs r5, r5, r1\n\t" "sbcs r6, r6, r1\n\t" - "sbcs r7, r7, r2\n\t" + "sbcs r8, r8, r2\n\t" "str r4, [%[a],#0]\n\t" "str r5, [%[a],#4]\n\t" "str r6, [%[a],#8]\n\t" - "str r7, [%[a],#12]\n\t" + "str r8, [%[a],#12]\n\t" "ldr r4, [%[a],#48]\n\t" "ldr r5, [%[a],#52]\n\t" "ldr r6, [%[a],#56]\n\t" - "ldr r7, [%[a],#60]\n\t" + "ldr r8, [%[a],#60]\n\t" "sbcs r4, r4, r2\n\t" "sbcs r5, r5, r2\n\t" "sbcs r6, r6, r3\n\t" - "sbc r7, r7, r1\n\t" + "sbc r8, r8, r1\n\t" "str r4, [%[a],#16]\n\t" "str r5, [%[a],#20]\n\t" "str r6, [%[a],#24]\n\t" - "str r7, [%[a],#28]\n\t" + "str r8, [%[a],#28]\n\t" : [a] "+r" (a) : - : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8" + : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r8", "r9" ); @@ -13975,28 +13970,27 @@ SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* sp_digit ca = 0; __asm__ __volatile__ ( - "mov r8, %[mp]\n\t" + "mov r9, %[mp]\n\t" "mov r12, %[m]\n\t" - "mov r9, %[a]\n\t" + "mov r10, %[a]\n\t" "mov r4, #0\n\t" - "add r11, r9, #32\n\t" + "add r11, r10, #32\n\t" "\n1:\n\t" /* mu = a[i] * mp */ - "mov %[mp], r8\n\t" - "ldr %[a], [r9]\n\t" + "mov %[mp], r9\n\t" + "ldr %[a], [r10]\n\t" "mul %[mp], %[mp], %[a]\n\t" "mov %[m], r12\n\t" - "mov r10, r9\n\t" - "add r14, r9, #24\n\t" + "add r14, r10, #24\n\t" "\n2:\n\t" /* a[i+j] += m[j] * mu */ "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" "adc r5, r5, #0\n\t" @@ -14005,10 +13999,10 @@ SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* "ldr %[a], [r10]\n\t" "mov r4, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r4, r4, r7\n\t" + "adc r4, r4, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r5, r5, %[a]\n\t" "adc r4, r4, #0\n\t" @@ -14019,10 +14013,10 @@ SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* "ldr %[a], [r10]\n\t" "mov r5, #0\n\t" /* Multiply m[j] and mu - Start */ - "ldr r7, [%[m]], #4\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]], #4\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds %[a], %[a], r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" /* Multiply m[j] and mu - Done */ "adds r4, r4, %[a]\n\t" "adc r5, r5, #0\n\t" @@ -14031,28 +14025,28 @@ SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* "mov r4, %[ca]\n\t" "mov %[ca], #0\n\t" /* Multiply m[7] and mu - Start */ - "ldr r7, [%[m]]\n\t" - "umull r6, r7, %[mp], r7\n\t" + "ldr r8, [%[m]]\n\t" + "umull r6, r8, %[mp], r8\n\t" "adds r5, r5, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc %[ca], %[ca], #0\n\t" /* Multiply m[7] and mu - Done */ "ldr r6, [r10]\n\t" - "ldr r7, [r10, #4]\n\t" + "ldr r8, [r10, #4]\n\t" "adds r6, r6, r5\n\t" - "adcs r7, r7, r4\n\t" + "adcs r8, r8, r4\n\t" "adc %[ca], %[ca], #0\n\t" "str r6, [r10]\n\t" - "str r7, [r10, #4]\n\t" + "str r8, [r10, #4]\n\t" /* Next word in a */ - "add r9, r9, #4\n\t" - "cmp r9, r11\n\t" + "sub r10, r10, #24\n\t" + "cmp r10, r11\n\t" "blt 1b\n\t" - "mov %[a], r9\n\t" + "mov %[a], r10\n\t" "mov %[m], r12\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) - : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14" ); sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca); @@ -14072,475 +14066,475 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( /* A[0] * B[0] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r3, r4, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r3, r4, r6, r8\n\t" "mov r5, #0\n\t" "str r3, [%[tmp], #0]\n\t" "mov r3, #0\n\t" /* A[0] * B[1] */ - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" /* A[1] * B[0] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "str r4, [%[tmp], #4]\n\t" "mov r4, #0\n\t" /* A[0] * B[2] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[1] * B[1] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[2] * B[0] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "str r5, [%[tmp], #8]\n\t" "mov r5, #0\n\t" /* A[0] * B[3] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[1] * B[2] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[2] * B[1] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[3] * B[0] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" "str r3, [%[tmp], #12]\n\t" "mov r3, #0\n\t" /* A[0] * B[4] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[1] * B[3] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[2] * B[2] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[3] * B[1] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[4] * B[0] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "str r4, [%[tmp], #16]\n\t" "mov r4, #0\n\t" /* A[0] * B[5] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[1] * B[4] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[2] * B[3] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[3] * B[2] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[4] * B[1] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[5] * B[0] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "str r5, [%[tmp], #20]\n\t" "mov r5, #0\n\t" /* A[0] * B[6] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[1] * B[5] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[2] * B[4] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[3] * B[3] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[4] * B[2] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[5] * B[1] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[6] * B[0] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" "str r3, [%[tmp], #24]\n\t" "mov r3, #0\n\t" /* A[0] * B[7] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[1] * B[6] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[2] * B[5] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[3] * B[4] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[4] * B[3] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[5] * B[2] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[6] * B[1] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[7] * B[0] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #0]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #0]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "str r4, [%[tmp], #28]\n\t" "mov r4, #0\n\t" /* A[1] * B[7] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[2] * B[6] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[3] * B[5] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[4] * B[4] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[5] * B[3] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[6] * B[2] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[7] * B[1] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "str r5, [%[r], #32]\n\t" "mov r5, #0\n\t" /* A[2] * B[7] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[3] * B[6] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[4] * B[5] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[5] * B[4] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[6] * B[3] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[7] * B[2] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" "str r3, [%[r], #36]\n\t" "mov r3, #0\n\t" /* A[3] * B[7] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[4] * B[6] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[5] * B[5] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[6] * B[4] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[7] * B[3] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #12]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #12]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "str r4, [%[r], #40]\n\t" "mov r4, #0\n\t" /* A[4] * B[7] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[5] * B[6] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[6] * B[5] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[7] * B[4] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #16]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #16]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "str r5, [%[r], #44]\n\t" "mov r5, #0\n\t" /* A[5] * B[7] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[6] * B[6] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[7] * B[5] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #20]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #20]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" "str r3, [%[r], #48]\n\t" "mov r3, #0\n\t" /* A[6] * B[7] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" /* A[7] * B[6] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "str r4, [%[r], #52]\n\t" "mov r4, #0\n\t" /* A[7] * B[7] */ "ldr r6, [%[a], #28]\n\t" - "ldr r7, [%[b], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[b], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adc r3, r3, r7\n\t" + "adc r3, r3, r8\n\t" "str r5, [%[r], #56]\n\t" "str r3, [%[r], #60]\n\t" /* Transfer tmp to r */ @@ -14562,7 +14556,7 @@ SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a, "str r6, [%[r], #28]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp) - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r8" ); } @@ -14598,315 +14592,315 @@ SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "str r3, [%[tmp], #0]\n\t" "mov r3, #0\n\t" /* A[0] * A[1] */ - "ldr r7, [%[a], #4]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[a], #4]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adc r5, r5, r7\n\t" + "adc r5, r5, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "str r4, [%[tmp], #4]\n\t" "mov r4, #0\n\t" /* A[0] * A[2] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[a], #8]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adc r3, r3, r7\n\t" + "adc r3, r3, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[1] * A[1] */ "ldr r6, [%[a], #4]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "str r5, [%[tmp], #8]\n\t" "mov r5, #0\n\t" /* A[0] * A[3] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[a], #12]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[1] * A[2] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[a], #8]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r5, r5, r10\n\t" + "ldr r8, [%[a], #8]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adc r5, r5, r11\n\t" "str r3, [%[tmp], #12]\n\t" "mov r3, #0\n\t" /* A[0] * A[4] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[a], #16]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[1] * A[3] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[a], #12]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[2] * A[2] */ "ldr r6, [%[a], #8]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adc r3, r3, r10\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r4, r4, r9\n\t" + "adcs r5, r5, r10\n\t" + "adc r3, r3, r11\n\t" "str r4, [%[tmp], #16]\n\t" "mov r4, #0\n\t" /* A[0] * A[5] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[a], #20]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[1] * A[4] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[a], #16]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[2] * A[3] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #12]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r5, r5, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r10\n\t" + "ldr r8, [%[a], #12]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r5, r5, r9\n\t" + "adcs r3, r3, r10\n\t" + "adc r4, r4, r11\n\t" "str r5, [%[tmp], #20]\n\t" "mov r5, #0\n\t" /* A[0] * A[6] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[1] * A[5] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[a], #20]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[2] * A[4] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #16]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[3] * A[3] */ "ldr r6, [%[a], #12]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r5, r5, r10\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adc r5, r5, r11\n\t" "str r3, [%[tmp], #24]\n\t" "mov r3, #0\n\t" /* A[0] * A[7] */ "ldr r6, [%[a], #0]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[1] * A[6] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[2] * A[5] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #20]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[3] * A[4] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[a], #16]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adc r3, r3, r10\n\t" + "ldr r8, [%[a], #16]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r4, r4, r9\n\t" + "adcs r5, r5, r10\n\t" + "adc r3, r3, r11\n\t" "str r4, [%[tmp], #28]\n\t" "mov r4, #0\n\t" /* A[1] * A[7] */ "ldr r6, [%[a], #4]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[2] * A[6] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[3] * A[5] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[a], #20]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[4] * A[4] */ "ldr r6, [%[a], #16]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r5, r5, r8\n\t" - "adcs r3, r3, r9\n\t" - "adc r4, r4, r10\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r5, r5, r9\n\t" + "adcs r3, r3, r10\n\t" + "adc r4, r4, r11\n\t" "str r5, [%[r], #32]\n\t" "mov r5, #0\n\t" /* A[2] * A[7] */ "ldr r6, [%[a], #8]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[3] * A[6] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[4] * A[5] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[a], #20]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r3, r3, r8\n\t" - "adcs r4, r4, r9\n\t" - "adc r5, r5, r10\n\t" + "ldr r8, [%[a], #20]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r3, r3, r9\n\t" + "adcs r4, r4, r10\n\t" + "adc r5, r5, r11\n\t" "str r3, [%[r], #36]\n\t" "mov r3, #0\n\t" /* A[3] * A[7] */ "ldr r6, [%[a], #12]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r8, r9, r6, r7\n\t" - "mov r10, #0\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r9, r10, r6, r8\n\t" + "mov r11, #0\n\t" /* A[4] * A[6] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r6, r7, r6, r7\n\t" - "adds r8, r8, r6\n\t" - "adcs r9, r9, r7\n\t" - "adc r10, r10, #0\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" + "adds r9, r9, r6\n\t" + "adcs r10, r10, r8\n\t" + "adc r11, r11, #0\n\t" /* A[5] * A[5] */ "ldr r6, [%[a], #20]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" - "adds r8, r8, r8\n\t" - "adcs r9, r9, r9\n\t" - "adc r10, r10, r10\n\t" - "adds r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adc r3, r3, r10\n\t" + "adds r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "adc r11, r11, r11\n\t" + "adds r4, r4, r9\n\t" + "adcs r5, r5, r10\n\t" + "adc r3, r3, r11\n\t" "str r4, [%[r], #40]\n\t" "mov r4, #0\n\t" /* A[4] * A[7] */ "ldr r6, [%[a], #16]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" /* A[5] * A[6] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[a], #24]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[a], #24]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "adds r5, r5, r6\n\t" - "adcs r3, r3, r7\n\t" + "adcs r3, r3, r8\n\t" "adc r4, r4, #0\n\t" "str r5, [%[r], #44]\n\t" "mov r5, #0\n\t" /* A[5] * A[7] */ "ldr r6, [%[a], #20]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[6] * A[6] */ "ldr r6, [%[a], #24]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" "str r3, [%[r], #48]\n\t" "mov r3, #0\n\t" /* A[6] * A[7] */ "ldr r6, [%[a], #24]\n\t" - "ldr r7, [%[a], #28]\n\t" - "umull r6, r7, r6, r7\n\t" + "ldr r8, [%[a], #28]\n\t" + "umull r6, r8, r6, r8\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "adc r3, r3, #0\n\t" "str r4, [%[r], #52]\n\t" "mov r4, #0\n\t" /* A[7] * A[7] */ "ldr r6, [%[a], #28]\n\t" - "umull r6, r7, r6, r6\n\t" + "umull r6, r8, r6, r6\n\t" "adds r5, r5, r6\n\t" - "adc r3, r3, r7\n\t" + "adc r3, r3, r8\n\t" "str r5, [%[r], #56]\n\t" "str r3, [%[r], #60]\n\t" /* Transfer tmp to r */ @@ -14928,7 +14922,7 @@ SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "str r6, [%[r], #28]\n\t" : : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11" ); } @@ -15103,11 +15097,11 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r6, %[a]\n\t" - "mov r7, #0\n\t" + "mov r8, #0\n\t" "add r6, r6, #32\n\t" - "sub r7, r7, #1\n\t" + "sub r8, r8, #1\n\t" "\n1:\n\t" - "adds %[c], %[c], r7\n\t" + "adds %[c], %[c], r8\n\t" "ldr r4, [%[a]]\n\t" "ldr r5, [%[b]]\n\t" "adcs r4, r4, r5\n\t" @@ -15121,7 +15115,7 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -15141,30 +15135,30 @@ SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "ldm %[a]!, {r4, r5}\n\t" - "ldm %[b]!, {r6, r7}\n\t" + "ldm %[b]!, {r6, r8}\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "stm %[r]!, {r4, r5}\n\t" "mov %[c], #0\n\t" "adc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -15188,40 +15182,40 @@ SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const "ldr r4, [%[a],#0]\n\t" "ldr r5, [%[a],#4]\n\t" "ldr r6, [%[b],#0]\n\t" - "ldr r7, [%[b],#4]\n\t" + "ldr r8, [%[b],#4]\n\t" "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "str r4, [%[r],#0]\n\t" "str r5, [%[r],#4]\n\t" "ldr r4, [%[a],#8]\n\t" "ldr r5, [%[a],#12]\n\t" "ldr r6, [%[b],#8]\n\t" - "ldr r7, [%[b],#12]\n\t" + "ldr r8, [%[b],#12]\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" + "adcs r5, r5, r8\n\t" "str r4, [%[r],#8]\n\t" "str r5, [%[r],#12]\n\t" "ldr r4, [%[a],#16]\n\t" "ldr r5, [%[a],#20]\n\t" "ldr r6, [%[b],#16]\n\t" - "ldr r7, [%[b],#20]\n\t" + "ldr r8, [%[b],#20]\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "mov r8, r4\n\t" - "mov r9, r5\n\t" + "adcs r5, r5, r8\n\t" + "mov r9, r4\n\t" + "mov r10, r5\n\t" "ldr r4, [%[a],#24]\n\t" "ldr r5, [%[a],#28]\n\t" "ldr r6, [%[b],#24]\n\t" - "ldr r7, [%[b],#28]\n\t" + "ldr r8, [%[b],#28]\n\t" "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "mov r10, r4\n\t" - "mov r11, r5\n\t" + "adcs r5, r5, r8\n\t" + "mov r11, r4\n\t" + "mov r12, r5\n\t" "adc r3, r3, r3\n\t" "mov r6, r3\n\t" "sub r3, r3, #1\n\t" "mvn r3, r3\n\t" - "mov r7, #0\n\t" + "mov r8, #0\n\t" "ldr r4, [%[r],#0]\n\t" "ldr r5, [%[r],#4]\n\t" "subs r4, r4, r3\n\t" @@ -15231,24 +15225,24 @@ SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const "ldr r4, [%[r],#8]\n\t" "ldr r5, [%[r],#12]\n\t" "sbcs r4, r4, r3\n\t" - "sbcs r5, r5, r7\n\t" + "sbcs r5, r5, r8\n\t" "str r4, [%[r],#8]\n\t" "str r5, [%[r],#12]\n\t" - "mov r4, r8\n\t" - "mov r5, r9\n\t" - "sbcs r4, r4, r7\n\t" - "sbcs r5, r5, r7\n\t" + "mov r4, r9\n\t" + "mov r5, r10\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r8\n\t" "str r4, [%[r],#16]\n\t" "str r5, [%[r],#20]\n\t" - "mov r4, r10\n\t" - "mov r5, r11\n\t" + "mov r4, r11\n\t" + "mov r5, r12\n\t" "sbcs r4, r4, r6\n\t" "sbc r5, r5, r3\n\t" "str r4, [%[r],#24]\n\t" "str r5, [%[r],#28]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" ); } @@ -15266,29 +15260,29 @@ SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const "ldr r4, [%[a],#0]\n\t" "ldr r5, [%[a],#4]\n\t" "ldr r6, [%[a],#8]\n\t" - "ldr r7, [%[a],#12]\n\t" + "ldr r8, [%[a],#12]\n\t" "adds r4, r4, r4\n\t" "adcs r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" + "adcs r8, r8, r8\n\t" "str r4, [%[r],#0]\n\t" "str r5, [%[r],#4]\n\t" "str r6, [%[r],#8]\n\t" - "str r7, [%[r],#12]\n\t" + "str r8, [%[r],#12]\n\t" "ldr r4, [%[a],#16]\n\t" "ldr r5, [%[a],#20]\n\t" "ldr r6, [%[a],#24]\n\t" - "ldr r7, [%[a],#28]\n\t" + "ldr r8, [%[a],#28]\n\t" "adcs r4, r4, r4\n\t" "adcs r5, r5, r5\n\t" "adcs r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "mov r8, r4\n\t" - "mov r9, r5\n\t" - "mov r10, r6\n\t" - "mov r11, r7\n\t" + "adcs r8, r8, r8\n\t" + "mov r9, r4\n\t" + "mov r10, r5\n\t" + "mov r11, r6\n\t" + "mov r12, r8\n\t" "mov r3, #0\n\t" - "mov r7, #0\n\t" + "mov r8, #0\n\t" "adc r3, r3, r3\n\t" "mov r2, r3\n\t" "sub r3, r3, #1\n\t" @@ -15303,23 +15297,23 @@ SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const "str r5, [%[r],#4]\n\t" "str r6, [%[r],#8]\n\t" "ldr r4, [%[r],#12]\n\t" - "mov r5, r8\n\t" - "mov r6, r9\n\t" - "sbcs r4, r4, r7\n\t" - "sbcs r5, r5, r7\n\t" - "sbcs r6, r6, r7\n\t" + "mov r5, r9\n\t" + "mov r6, r10\n\t" + "sbcs r4, r4, r8\n\t" + "sbcs r5, r5, r8\n\t" + "sbcs r6, r6, r8\n\t" "str r4, [%[r],#12]\n\t" "str r5, [%[r],#16]\n\t" "str r6, [%[r],#20]\n\t" - "mov r4, r10\n\t" - "mov r5, r11\n\t" + "mov r4, r11\n\t" + "mov r5, r12\n\t" "sbcs r4, r4, r2\n\t" "sbc r5, r5, r3\n\t" "str r4, [%[r],#24]\n\t" "str r5, [%[r],#28]\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "r3", "r2", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r2", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" ); } @@ -15334,113 +15328,77 @@ SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const (void)m; __asm__ __volatile__ ( - "ldr r6, [%[a],#0]\n\t" - "ldr r7, [%[a],#4]\n\t" + "ldr r2, [%[a],#0]\n\t" + "ldr r3, [%[a],#4]\n\t" "ldr r4, [%[a],#8]\n\t" "ldr r5, [%[a],#12]\n\t" - "adds r6, r6, r6\n\t" - "adcs r7, r7, r7\n\t" - "adcs r4, r4, r4\n\t" - "adcs r5, r5, r5\n\t" - "mov r8, r4\n\t" - "mov r9, r5\n\t" - "ldr r2, [%[a],#16]\n\t" - "ldr r3, [%[a],#20]\n\t" - "ldr r4, [%[a],#24]\n\t" - "ldr r5, [%[a],#28]\n\t" - "adcs r2, r2, r2\n\t" + "ldr r6, [%[a],#16]\n\t" + "ldr r8, [%[a],#20]\n\t" + "ldr r9, [%[a],#24]\n\t" + "ldr r10, [%[a],#28]\n\t" + "adds r2, r2, r2\n\t" "adcs r3, r3, r3\n\t" "adcs r4, r4, r4\n\t" "adcs r5, r5, r5\n\t" - "mov r10, r2\n\t" - "mov r11, r3\n\t" - "mov r12, r4\n\t" - "mov r14, r5\n\t" - "mov r3, #0\n\t" - "mov r5, #0\n\t" - "adc r3, r3, r3\n\t" - "mov r4, r3\n\t" - "sub r3, r3, #1\n\t" - "mvn r3, r3\n\t" - "subs r6, r6, r3\n\t" - "sbcs r7, r7, r3\n\t" - "mov r2, r8\n\t" - "sbcs r2, r2, r3\n\t" - "mov r8, r2\n\t" - "mov r2, r9\n\t" - "sbcs r2, r2, r5\n\t" - "mov r9, r2\n\t" - "mov r2, r10\n\t" - "sbcs r2, r2, r5\n\t" - "mov r10, r2\n\t" - "mov r2, r11\n\t" - "sbcs r2, r2, r5\n\t" - "mov r11, r2\n\t" - "mov r2, r12\n\t" - "sbcs r2, r2, r4\n\t" - "mov r12, r2\n\t" - "mov r2, r14\n\t" - "sbc r2, r2, r3\n\t" - "mov r14, r2\n\t" - "ldr r2, [%[a],#0]\n\t" - "ldr r3, [%[a],#4]\n\t" - "adds r6, r6, r2\n\t" - "adcs r7, r7, r3\n\t" - "ldr r2, [%[a],#8]\n\t" - "ldr r3, [%[a],#12]\n\t" - "mov r4, r8\n\t" - "mov r5, r9\n\t" - "adcs r2, r2, r4\n\t" - "adcs r3, r3, r5\n\t" - "mov r8, r2\n\t" - "mov r9, r3\n\t" - "ldr r2, [%[a],#16]\n\t" - "ldr r3, [%[a],#20]\n\t" - "mov r4, r10\n\t" - "mov r5, r11\n\t" - "adcs r2, r2, r4\n\t" - "adcs r3, r3, r5\n\t" - "mov r10, r2\n\t" - "mov r11, r3\n\t" - "ldr r2, [%[a],#24]\n\t" - "ldr r3, [%[a],#28]\n\t" - "mov r4, r12\n\t" - "mov r5, r14\n\t" - "adcs r2, r2, r4\n\t" - "adcs r3, r3, r5\n\t" - "mov r12, r2\n\t" - "mov r14, r3\n\t" - "mov r3, #0\n\t" - "mov r5, #0\n\t" - "adc r3, r3, r3\n\t" - "mov r4, r3\n\t" - "sub r3, r3, #1\n\t" - "mvn r3, r3\n\t" - "subs r6, r6, r3\n\t" - "str r6, [%[r],#0]\n\t" - "sbcs r7, r7, r3\n\t" - "str r7, [%[r],#4]\n\t" - "mov r2, r8\n\t" - "sbcs r2, r2, r3\n\t" - "str r2, [%[r],#8]\n\t" - "mov r2, r9\n\t" - "sbcs r2, r2, r5\n\t" - "str r2, [%[r],#12]\n\t" - "mov r2, r10\n\t" - "sbcs r2, r2, r5\n\t" - "str r2, [%[r],#16]\n\t" - "mov r2, r11\n\t" - "sbcs r2, r2, r5\n\t" - "str r2, [%[r],#20]\n\t" - "mov r2, r12\n\t" - "sbcs r2, r2, r4\n\t" - "str r2, [%[r],#24]\n\t" - "mov r2, r14\n\t" - "sbc r2, r2, r3\n\t" - "str r2, [%[r],#28]\n\t" + "adcs r6, r6, r6\n\t" + "adcs r8, r8, r8\n\t" + "adcs r9, r9, r9\n\t" + "adcs r10, r10, r10\n\t" + "mov r11, #0\n\t" + "mov r14, #0\n\t" + "adc r11, r11, r11\n\t" + "mov r12, r11\n\t" + "sub r11, r11, #1\n\t" + "mvn r11, r11\n\t" + "subs r2, r2, r11\n\t" + "sbcs r3, r3, r11\n\t" + "sbcs r4, r4, r11\n\t" + "sbcs r5, r5, r14\n\t" + "sbcs r6, r6, r14\n\t" + "sbcs r8, r8, r14\n\t" + "sbcs r9, r9, r12\n\t" + "sbc r10, r10, r11\n\t" + "ldr r12, [%[a],#0]\n\t" + "ldr r14, [%[a],#4]\n\t" + "adds r2, r2, r12\n\t" + "adcs r3, r3, r14\n\t" + "ldr r12, [%[a],#8]\n\t" + "ldr r14, [%[a],#12]\n\t" + "adcs r4, r4, r12\n\t" + "adcs r5, r5, r14\n\t" + "ldr r12, [%[a],#16]\n\t" + "ldr r14, [%[a],#20]\n\t" + "adcs r6, r6, r12\n\t" + "adcs r8, r8, r14\n\t" + "ldr r12, [%[a],#24]\n\t" + "ldr r14, [%[a],#28]\n\t" + "adcs r9, r9, r12\n\t" + "adcs r10, r10, r14\n\t" + "mov r11, #0\n\t" + "mov r14, #0\n\t" + "adc r11, r11, r11\n\t" + "mov r12, r11\n\t" + "sub r11, r11, #1\n\t" + "mvn r11, r11\n\t" + "subs r2, r2, r11\n\t" + "str r2, [%[r],#0]\n\t" + "sbcs r3, r3, r11\n\t" + "str r3, [%[r],#4]\n\t" + "sbcs r4, r4, r11\n\t" + "str r4, [%[r],#8]\n\t" + "sbcs r5, r5, r14\n\t" + "str r5, [%[r],#12]\n\t" + "sbcs r6, r6, r14\n\t" + "str r6, [%[r],#16]\n\t" + "sbcs r8, r8, r14\n\t" + "str r8, [%[r],#20]\n\t" + "sbcs r9, r9, r12\n\t" + "str r9, [%[r],#24]\n\t" + "sbc r10, r10, r11\n\t" + "str r10, [%[r],#28]\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" + : "memory", "r11", "r12", "r14", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10" ); } @@ -15460,37 +15418,37 @@ SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const "ldr r4, [%[a],#0]\n\t" "ldr r5, [%[a],#4]\n\t" "ldr r6, [%[b],#0]\n\t" - "ldr r7, [%[b],#4]\n\t" + "ldr r8, [%[b],#4]\n\t" "subs r4, r4, r6\n\t" - "sbcs r5, r5, r7\n\t" + "sbcs r5, r5, r8\n\t" "str r4, [%[r],#0]\n\t" "str r5, [%[r],#4]\n\t" "ldr r4, [%[a],#8]\n\t" "ldr r5, [%[a],#12]\n\t" "ldr r6, [%[b],#8]\n\t" - "ldr r7, [%[b],#12]\n\t" + "ldr r8, [%[b],#12]\n\t" "sbcs r4, r4, r6\n\t" - "sbcs r5, r5, r7\n\t" + "sbcs r5, r5, r8\n\t" "str r4, [%[r],#8]\n\t" "str r5, [%[r],#12]\n\t" "ldr r4, [%[a],#16]\n\t" "ldr r5, [%[a],#20]\n\t" "ldr r6, [%[b],#16]\n\t" - "ldr r7, [%[b],#20]\n\t" + "ldr r8, [%[b],#20]\n\t" "sbcs r4, r4, r6\n\t" - "sbcs r5, r5, r7\n\t" - "mov r8, r4\n\t" - "mov r9, r5\n\t" + "sbcs r5, r5, r8\n\t" + "mov r9, r4\n\t" + "mov r10, r5\n\t" "ldr r4, [%[a],#24]\n\t" "ldr r5, [%[a],#28]\n\t" "ldr r6, [%[b],#24]\n\t" - "ldr r7, [%[b],#28]\n\t" + "ldr r8, [%[b],#28]\n\t" "sbcs r4, r4, r6\n\t" - "sbcs r5, r5, r7\n\t" - "mov r10, r4\n\t" - "mov r11, r5\n\t" + "sbcs r5, r5, r8\n\t" + "mov r11, r4\n\t" + "mov r12, r5\n\t" "sbc r3, r3, r3\n\t" - "lsr r7, r3, #31\n\t" + "lsr r8, r3, #31\n\t" "mov r6, #0\n\t" "ldr r4, [%[r],#0]\n\t" "ldr r5, [%[r],#4]\n\t" @@ -15504,21 +15462,21 @@ SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const "adcs r5, r5, r6\n\t" "str r4, [%[r],#8]\n\t" "str r5, [%[r],#12]\n\t" - "mov r4, r8\n\t" - "mov r5, r9\n\t" + "mov r4, r9\n\t" + "mov r5, r10\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r6\n\t" "str r4, [%[r],#16]\n\t" "str r5, [%[r],#20]\n\t" - "mov r4, r10\n\t" - "mov r5, r11\n\t" - "adcs r4, r4, r7\n\t" + "mov r4, r11\n\t" + "mov r5, r12\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, r3\n\t" "str r4, [%[r],#24]\n\t" "str r5, [%[r],#28]\n\t" : : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12" ); } @@ -15531,12 +15489,12 @@ SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) { __asm__ __volatile__ ( - "ldr r7, [%[a], #0]\n\t" - "lsl r7, r7, #31\n\t" - "lsr r7, r7, #31\n\t" + "ldr r8, [%[a], #0]\n\t" + "lsl r8, r8, #31\n\t" + "lsr r8, r8, #31\n\t" "mov r5, #0\n\t" - "sub r5, r5, r7\n\t" - "mov r7, #0\n\t" + "sub r5, r5, r8\n\t" + "mov r8, #0\n\t" "lsl r6, r5, #31\n\t" "lsr r6, r6, #31\n\t" "ldr r3, [%[a], #0]\n\t" @@ -15548,28 +15506,28 @@ SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_d "ldr r3, [%[a], #8]\n\t" "ldr r4, [%[a], #12]\n\t" "adcs r3, r3, r5\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "str r3, [%[r], #8]\n\t" "str r4, [%[r], #12]\n\t" "ldr r3, [%[a], #16]\n\t" "ldr r4, [%[a], #20]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r7\n\t" + "adcs r3, r3, r8\n\t" + "adcs r4, r4, r8\n\t" "str r3, [%[r], #16]\n\t" "str r4, [%[r], #20]\n\t" "ldr r3, [%[a], #24]\n\t" "ldr r4, [%[a], #28]\n\t" "adcs r3, r3, r6\n\t" "adcs r4, r4, r5\n\t" - "adc r7, r7, r7\n\t" - "lsl r7, r7, #31\n\t" + "adc r8, r8, r8\n\t" + "lsl r8, r8, #31\n\t" "lsr r5, r3, #1\n\t" "lsl r3, r3, #31\n\t" "lsr r6, r4, #1\n\t" "lsl r4, r4, #31\n\t" "orr r5, r5, r4\n\t" - "orr r6, r6, r7\n\t" - "mov r7, r3\n\t" + "orr r6, r6, r8\n\t" + "mov r8, r3\n\t" "str r5, [%[r], #24]\n\t" "str r6, [%[r], #28]\n\t" "ldr r3, [%[a], #16]\n\t" @@ -15579,8 +15537,8 @@ SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_d "lsr r6, r4, #1\n\t" "lsl r4, r4, #31\n\t" "orr r5, r5, r4\n\t" - "orr r6, r6, r7\n\t" - "mov r7, r3\n\t" + "orr r6, r6, r8\n\t" + "mov r8, r3\n\t" "str r5, [%[r], #16]\n\t" "str r6, [%[r], #20]\n\t" "ldr r3, [%[a], #8]\n\t" @@ -15590,8 +15548,8 @@ SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_d "lsr r6, r4, #1\n\t" "lsl r4, r4, #31\n\t" "orr r5, r5, r4\n\t" - "orr r6, r6, r7\n\t" - "mov r7, r3\n\t" + "orr r6, r6, r8\n\t" + "mov r8, r3\n\t" "str r5, [%[r], #8]\n\t" "str r6, [%[r], #12]\n\t" "ldr r3, [%[r], #0]\n\t" @@ -15600,12 +15558,12 @@ SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_d "lsr r6, r4, #1\n\t" "lsl r4, r4, #31\n\t" "orr r5, r5, r4\n\t" - "orr r6, r6, r7\n\t" + "orr r6, r6, r8\n\t" "str r5, [%[r], #0]\n\t" "str r6, [%[r], #4]\n\t" : : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r8" ); } @@ -15739,39 +15697,39 @@ SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[a], #4]\n\t" "ldr r6, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" + "ldr r8, [%[b], #4]\n\t" "subs r4, r4, r6\n\t" - "sbcs r5, r5, r7\n\t" + "sbcs r5, r5, r8\n\t" "str r4, [%[r], #0]\n\t" "str r5, [%[r], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "ldr r5, [%[a], #12]\n\t" "ldr r6, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" + "ldr r8, [%[b], #12]\n\t" "sbcs r4, r4, r6\n\t" - "sbcs r5, r5, r7\n\t" + "sbcs r5, r5, r8\n\t" "str r4, [%[r], #8]\n\t" "str r5, [%[r], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "ldr r5, [%[a], #20]\n\t" "ldr r6, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" + "ldr r8, [%[b], #20]\n\t" "sbcs r4, r4, r6\n\t" - "sbcs r5, r5, r7\n\t" + "sbcs r5, r5, r8\n\t" "str r4, [%[r], #16]\n\t" "str r5, [%[r], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "ldr r5, [%[a], #28]\n\t" "ldr r6, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" + "ldr r8, [%[b], #28]\n\t" "sbcs r4, r4, r6\n\t" - "sbcs r5, r5, r7\n\t" + "sbcs r5, r5, r8\n\t" "str r4, [%[r], #24]\n\t" "str r5, [%[r], #28]\n\t" "sbc %[c], %[c], %[c]\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r4", "r5", "r6", "r7" + : "memory", "r4", "r5", "r6", "r8" ); return c; @@ -18607,8 +18565,8 @@ SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, { sp_digit c = 0; __asm__ __volatile__ ( - "mov r7, %[a]\n\t" - "add r7, r7, #32\n\t" + "mov r8, %[a]\n\t" + "add r8, r8, #32\n\t" "\n1:\n\t" "mov r5, #0\n\t" "subs r5, r5, %[c]\n\t" @@ -18623,11 +18581,11 @@ SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a, "sbc %[c], %[c], %[c]\n\t" "add %[a], %[a], #8\n\t" "add %[b], %[b], #8\n\t" - "cmp %[a], r7\n\t" + "cmp %[a], r8\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7" + : "memory", "r3", "r4", "r5", "r6", "r8" ); return c; @@ -18686,7 +18644,7 @@ SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, sp_digit b) { __asm__ __volatile__ ( - "add r8, %[a], #32\n\t" + "add r9, %[a], #32\n\t" /* A[0] * B */ "ldr r6, [%[a]], #4\n\t" "umull r5, r3, r6, %[b]\n\t" @@ -18697,20 +18655,20 @@ SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, "mov r5, #0\n\t" /* A[] * B */ "ldr r6, [%[a]], #4\n\t" - "umull r6, r7, r6, %[b]\n\t" + "umull r6, r8, r6, %[b]\n\t" "adds r3, r3, r6\n\t" - "adcs r4, r4, r7\n\t" + "adcs r4, r4, r8\n\t" "adc r5, r5, #0\n\t" /* A[] * B - Done */ "str r3, [%[r]], #4\n\t" "mov r3, r4\n\t" "mov r4, r5\n\t" - "cmp %[a], r8\n\t" + "cmp %[a], r9\n\t" "blt 1b\n\t" "str r3, [%[r]]\n\t" : [r] "+r" (r), [a] "+r" (a) : [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8" + : "memory", "r3", "r4", "r5", "r6", "r8", "r9" ); } @@ -18732,36 +18690,36 @@ SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0, "lsr r6, %[div], #16\n\t" "add r6, r6, #1\n\t" "udiv r4, %[d1], r6\n\t" - "lsl r7, r4, #16\n\t" - "umull r4, r5, %[div], r7\n\t" + "lsl r8, r4, #16\n\t" + "umull r4, r5, %[div], r8\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "udiv r5, %[d1], r6\n\t" "lsl r4, r5, #16\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "lsl r4, %[d1], #16\n\t" "orr r4, r4, %[d0], lsr #16\n\t" "udiv r4, r4, r6\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "lsl r4, %[d1], #16\n\t" "orr r4, r4, %[d0], lsr #16\n\t" "udiv r4, r4, r6\n\t" - "add r7, r7, r4\n\t" + "add r8, r8, r4\n\t" "umull r4, r5, %[div], r4\n\t" "subs %[d0], %[d0], r4\n\t" "sbc %[d1], %[d1], r5\n\t" "udiv r4, %[d0], %[div]\n\t" - "add r7, r7, r4\n\t" - "mov %[r], r7\n\t" + "add r8, r8, r4\n\t" + "mov %[r], r8\n\t" : [r] "+r" (r) : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div) - : "r4", "r5", "r6", "r7" + : "r4", "r5", "r6", "r8" ); return r; }