diff --git a/configure.ac b/configure.ac index 6caad120a..bd74fea44 100644 --- a/configure.ac +++ b/configure.ac @@ -6254,7 +6254,7 @@ do ;; *) - AC_MSG_ERROR([Invalid choice of Single Precision length in bits [256, 2048, 3072]: $ENABLED_SP.]) + AC_MSG_ERROR([Invalid choice of Single Precision length in bits [256, 384, 521, 1024, 2048, 3072, 4096]: $ENABLED_SP.]) break;; esac done diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index a745e76f5..5f9a5957d 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -685,30 +685,24 @@ static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "adds r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "adc %[c], r14, r14\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -724,50 +718,38 @@ static sp_digit sp_2048_sub_in_place_16(sp_digit* a, const sp_digit* b) sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r2, r3, [%[a], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" - "ldrd r8, r9, [%[b], #8]\n\t" - "subs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #0]\n\t" - "strd r4, r5, [%[a], #8]\n\t" - "ldrd r2, r3, [%[a], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" - "ldrd r8, r9, [%[b], #24]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #16]\n\t" - "strd r4, r5, [%[a], #24]\n\t" - "ldrd r2, r3, [%[a], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" - "ldrd r8, r9, [%[b], #40]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #32]\n\t" - "strd r4, r5, [%[a], #40]\n\t" - "ldrd r2, r3, [%[a], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" - "ldrd r8, r9, [%[b], #56]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #48]\n\t" - "strd r4, r5, [%[a], #56]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r11, r11\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -786,50 +768,38 @@ static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "adds r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "adc %[c], r14, r14\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -910,90 +880,66 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r2, r3, [%[a], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" - "ldrd r8, r9, [%[b], #8]\n\t" - "subs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #0]\n\t" - "strd r4, r5, [%[a], #8]\n\t" - "ldrd r2, r3, [%[a], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" - "ldrd r8, r9, [%[b], #24]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #16]\n\t" - "strd r4, r5, [%[a], #24]\n\t" - "ldrd r2, r3, [%[a], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" - "ldrd r8, r9, [%[b], #40]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #32]\n\t" - "strd r4, r5, [%[a], #40]\n\t" - "ldrd r2, r3, [%[a], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" - "ldrd r8, r9, [%[b], #56]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #48]\n\t" - "strd r4, r5, [%[a], #56]\n\t" - "ldrd r2, r3, [%[a], #64]\n\t" - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" - "ldrd r8, r9, [%[b], #72]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #64]\n\t" - "strd r4, r5, [%[a], #72]\n\t" - "ldrd r2, r3, [%[a], #80]\n\t" - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" - "ldrd r8, r9, [%[b], #88]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #80]\n\t" - "strd r4, r5, [%[a], #88]\n\t" - "ldrd r2, r3, [%[a], #96]\n\t" - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" - "ldrd r8, r9, [%[b], #104]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #96]\n\t" - "strd r4, r5, [%[a], #104]\n\t" - "ldrd r2, r3, [%[a], #112]\n\t" - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" - "ldrd r8, r9, [%[b], #120]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #112]\n\t" - "strd r4, r5, [%[a], #120]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r11, r11\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -1012,90 +958,66 @@ static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "adds r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldrd r3, r4, [%[a], #64]\n\t" - "ldrd r5, r6, [%[a], #72]\n\t" - "ldrd r7, r8, [%[b], #64]\n\t" - "ldrd r9, r10, [%[b], #72]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #64]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "ldrd r3, r4, [%[a], #80]\n\t" - "ldrd r5, r6, [%[a], #88]\n\t" - "ldrd r7, r8, [%[b], #80]\n\t" - "ldrd r9, r10, [%[b], #88]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #80]\n\t" - "strd r5, r6, [%[r], #88]\n\t" - "ldrd r3, r4, [%[a], #96]\n\t" - "ldrd r5, r6, [%[a], #104]\n\t" - "ldrd r7, r8, [%[b], #96]\n\t" - "ldrd r9, r10, [%[b], #104]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #96]\n\t" - "strd r5, r6, [%[r], #104]\n\t" - "ldrd r3, r4, [%[a], #112]\n\t" - "ldrd r5, r6, [%[a], #120]\n\t" - "ldrd r7, r8, [%[b], #112]\n\t" - "ldrd r9, r10, [%[b], #120]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #112]\n\t" - "strd r5, r6, [%[r], #120]\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "adc %[c], r14, r14\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -1180,170 +1102,122 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r2, r3, [%[a], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" - "ldrd r8, r9, [%[b], #8]\n\t" - "subs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #0]\n\t" - "strd r4, r5, [%[a], #8]\n\t" - "ldrd r2, r3, [%[a], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" - "ldrd r8, r9, [%[b], #24]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #16]\n\t" - "strd r4, r5, [%[a], #24]\n\t" - "ldrd r2, r3, [%[a], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" - "ldrd r8, r9, [%[b], #40]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #32]\n\t" - "strd r4, r5, [%[a], #40]\n\t" - "ldrd r2, r3, [%[a], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" - "ldrd r8, r9, [%[b], #56]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #48]\n\t" - "strd r4, r5, [%[a], #56]\n\t" - "ldrd r2, r3, [%[a], #64]\n\t" - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" - "ldrd r8, r9, [%[b], #72]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #64]\n\t" - "strd r4, r5, [%[a], #72]\n\t" - "ldrd r2, r3, [%[a], #80]\n\t" - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" - "ldrd r8, r9, [%[b], #88]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #80]\n\t" - "strd r4, r5, [%[a], #88]\n\t" - "ldrd r2, r3, [%[a], #96]\n\t" - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" - "ldrd r8, r9, [%[b], #104]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #96]\n\t" - "strd r4, r5, [%[a], #104]\n\t" - "ldrd r2, r3, [%[a], #112]\n\t" - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" - "ldrd r8, r9, [%[b], #120]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #112]\n\t" - "strd r4, r5, [%[a], #120]\n\t" - "ldrd r2, r3, [%[a], #128]\n\t" - "ldrd r4, r5, [%[a], #136]\n\t" - "ldrd r6, r7, [%[b], #128]\n\t" - "ldrd r8, r9, [%[b], #136]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #128]\n\t" - "strd r4, r5, [%[a], #136]\n\t" - "ldrd r2, r3, [%[a], #144]\n\t" - "ldrd r4, r5, [%[a], #152]\n\t" - "ldrd r6, r7, [%[b], #144]\n\t" - "ldrd r8, r9, [%[b], #152]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #144]\n\t" - "strd r4, r5, [%[a], #152]\n\t" - "ldrd r2, r3, [%[a], #160]\n\t" - "ldrd r4, r5, [%[a], #168]\n\t" - "ldrd r6, r7, [%[b], #160]\n\t" - "ldrd r8, r9, [%[b], #168]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #160]\n\t" - "strd r4, r5, [%[a], #168]\n\t" - "ldrd r2, r3, [%[a], #176]\n\t" - "ldrd r4, r5, [%[a], #184]\n\t" - "ldrd r6, r7, [%[b], #176]\n\t" - "ldrd r8, r9, [%[b], #184]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #176]\n\t" - "strd r4, r5, [%[a], #184]\n\t" - "ldrd r2, r3, [%[a], #192]\n\t" - "ldrd r4, r5, [%[a], #200]\n\t" - "ldrd r6, r7, [%[b], #192]\n\t" - "ldrd r8, r9, [%[b], #200]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #192]\n\t" - "strd r4, r5, [%[a], #200]\n\t" - "ldrd r2, r3, [%[a], #208]\n\t" - "ldrd r4, r5, [%[a], #216]\n\t" - "ldrd r6, r7, [%[b], #208]\n\t" - "ldrd r8, r9, [%[b], #216]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #208]\n\t" - "strd r4, r5, [%[a], #216]\n\t" - "ldrd r2, r3, [%[a], #224]\n\t" - "ldrd r4, r5, [%[a], #232]\n\t" - "ldrd r6, r7, [%[b], #224]\n\t" - "ldrd r8, r9, [%[b], #232]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #224]\n\t" - "strd r4, r5, [%[a], #232]\n\t" - "ldrd r2, r3, [%[a], #240]\n\t" - "ldrd r4, r5, [%[a], #248]\n\t" - "ldrd r6, r7, [%[b], #240]\n\t" - "ldrd r8, r9, [%[b], #248]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #240]\n\t" - "strd r4, r5, [%[a], #248]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r11, r11\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -1362,170 +1236,122 @@ static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "adds r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldrd r3, r4, [%[a], #64]\n\t" - "ldrd r5, r6, [%[a], #72]\n\t" - "ldrd r7, r8, [%[b], #64]\n\t" - "ldrd r9, r10, [%[b], #72]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #64]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "ldrd r3, r4, [%[a], #80]\n\t" - "ldrd r5, r6, [%[a], #88]\n\t" - "ldrd r7, r8, [%[b], #80]\n\t" - "ldrd r9, r10, [%[b], #88]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #80]\n\t" - "strd r5, r6, [%[r], #88]\n\t" - "ldrd r3, r4, [%[a], #96]\n\t" - "ldrd r5, r6, [%[a], #104]\n\t" - "ldrd r7, r8, [%[b], #96]\n\t" - "ldrd r9, r10, [%[b], #104]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #96]\n\t" - "strd r5, r6, [%[r], #104]\n\t" - "ldrd r3, r4, [%[a], #112]\n\t" - "ldrd r5, r6, [%[a], #120]\n\t" - "ldrd r7, r8, [%[b], #112]\n\t" - "ldrd r9, r10, [%[b], #120]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #112]\n\t" - "strd r5, r6, [%[r], #120]\n\t" - "ldrd r3, r4, [%[a], #128]\n\t" - "ldrd r5, r6, [%[a], #136]\n\t" - "ldrd r7, r8, [%[b], #128]\n\t" - "ldrd r9, r10, [%[b], #136]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #128]\n\t" - "strd r5, r6, [%[r], #136]\n\t" - "ldrd r3, r4, [%[a], #144]\n\t" - "ldrd r5, r6, [%[a], #152]\n\t" - "ldrd r7, r8, [%[b], #144]\n\t" - "ldrd r9, r10, [%[b], #152]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #144]\n\t" - "strd r5, r6, [%[r], #152]\n\t" - "ldrd r3, r4, [%[a], #160]\n\t" - "ldrd r5, r6, [%[a], #168]\n\t" - "ldrd r7, r8, [%[b], #160]\n\t" - "ldrd r9, r10, [%[b], #168]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #160]\n\t" - "strd r5, r6, [%[r], #168]\n\t" - "ldrd r3, r4, [%[a], #176]\n\t" - "ldrd r5, r6, [%[a], #184]\n\t" - "ldrd r7, r8, [%[b], #176]\n\t" - "ldrd r9, r10, [%[b], #184]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #176]\n\t" - "strd r5, r6, [%[r], #184]\n\t" - "ldrd r3, r4, [%[a], #192]\n\t" - "ldrd r5, r6, [%[a], #200]\n\t" - "ldrd r7, r8, [%[b], #192]\n\t" - "ldrd r9, r10, [%[b], #200]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #192]\n\t" - "strd r5, r6, [%[r], #200]\n\t" - "ldrd r3, r4, [%[a], #208]\n\t" - "ldrd r5, r6, [%[a], #216]\n\t" - "ldrd r7, r8, [%[b], #208]\n\t" - "ldrd r9, r10, [%[b], #216]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #208]\n\t" - "strd r5, r6, [%[r], #216]\n\t" - "ldrd r3, r4, [%[a], #224]\n\t" - "ldrd r5, r6, [%[a], #232]\n\t" - "ldrd r7, r8, [%[b], #224]\n\t" - "ldrd r9, r10, [%[b], #232]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #224]\n\t" - "strd r5, r6, [%[r], #232]\n\t" - "ldrd r3, r4, [%[a], #240]\n\t" - "ldrd r5, r6, [%[a], #248]\n\t" - "ldrd r7, r8, [%[b], #240]\n\t" - "ldrd r9, r10, [%[b], #248]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #240]\n\t" - "strd r5, r6, [%[r], #248]\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "adc %[c], r14, r14\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -1951,30 +1777,24 @@ static sp_digit sp_2048_sub_8(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "subs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -2028,50 +1848,38 @@ static sp_digit sp_2048_sub_16(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "subs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -2125,90 +1933,66 @@ static sp_digit sp_2048_sub_32(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "subs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldrd r3, r4, [%[a], #64]\n\t" - "ldrd r5, r6, [%[a], #72]\n\t" - "ldrd r7, r8, [%[b], #64]\n\t" - "ldrd r9, r10, [%[b], #72]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #64]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "ldrd r3, r4, [%[a], #80]\n\t" - "ldrd r5, r6, [%[a], #88]\n\t" - "ldrd r7, r8, [%[b], #80]\n\t" - "ldrd r9, r10, [%[b], #88]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #80]\n\t" - "strd r5, r6, [%[r], #88]\n\t" - "ldrd r3, r4, [%[a], #96]\n\t" - "ldrd r5, r6, [%[a], #104]\n\t" - "ldrd r7, r8, [%[b], #96]\n\t" - "ldrd r9, r10, [%[b], #104]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #96]\n\t" - "strd r5, r6, [%[r], #104]\n\t" - "ldrd r3, r4, [%[a], #112]\n\t" - "ldrd r5, r6, [%[a], #120]\n\t" - "ldrd r7, r8, [%[b], #112]\n\t" - "ldrd r9, r10, [%[b], #120]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #112]\n\t" - "strd r5, r6, [%[r], #120]\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -2267,23 +2051,20 @@ static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a, "add r14, %[a], #256\n\t" "\n1:\n\t" "adds %[c], %[c], #-1\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "adcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "mov r3, #0\n\t" - "adc %[c], r3, #0\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -2305,22 +2086,19 @@ static sp_digit sp_2048_sub_in_place_64(sp_digit* a, const sp_digit* b) "add r12, %[a], #256\n\t" "\n1:\n\t" "subs %[c], r14, %[c]\n\t" - "ldrd r3, r4, [%[a]]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[a]], #8\n\t" - "strd r5, r6, [%[a]], #8\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" "sbc %[c], r14, r14\n\t" "cmp %[a], r12\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" ); return c; @@ -2494,23 +2272,20 @@ static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a, "add r14, %[a], #128\n\t" "\n1:\n\t" "adds %[c], %[c], #-1\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "adcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "mov r3, #0\n\t" - "adc %[c], r3, #0\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -2532,22 +2307,19 @@ static sp_digit sp_2048_sub_in_place_32(sp_digit* a, const sp_digit* b) "add r12, %[a], #128\n\t" "\n1:\n\t" "subs %[c], r14, %[c]\n\t" - "ldrd r3, r4, [%[a]]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[a]], #8\n\t" - "strd r5, r6, [%[a]], #8\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" "sbc %[c], r14, r14\n\t" "cmp %[a], r12\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" ); return c; @@ -2750,453 +2522,453 @@ static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r10, #0\n\t" "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r3, r4, %[b], r8\n\t" "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" + "str r4, [%[r]], #4\n\t" "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" + "str r5, [%[r]], #4\n\t" "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" + "str r3, [%[r]], #4\n\t" "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" + "str r4, [%[r]], #4\n\t" "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" + "str r5, [%[r]], #4\n\t" "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" + "str r3, [%[r]], #4\n\t" "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" + "str r4, [%[r]], #4\n\t" "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" + "str r5, [%[r]], #4\n\t" "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" + "str r3, [%[r]], #4\n\t" "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" + "str r4, [%[r]], #4\n\t" "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #44]\n\t" + "str r5, [%[r]], #4\n\t" "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #48]\n\t" + "str r3, [%[r]], #4\n\t" "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #52]\n\t" + "str r4, [%[r]], #4\n\t" "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #56]\n\t" + "str r5, [%[r]], #4\n\t" "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #60]\n\t" + "str r3, [%[r]], #4\n\t" "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #64]\n\t" + "str r4, [%[r]], #4\n\t" "# A[17] * B\n\t" - "ldr r8, [%[a], #68]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #68]\n\t" + "str r5, [%[r]], #4\n\t" "# A[18] * B\n\t" - "ldr r8, [%[a], #72]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #72]\n\t" + "str r3, [%[r]], #4\n\t" "# A[19] * B\n\t" - "ldr r8, [%[a], #76]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #76]\n\t" + "str r4, [%[r]], #4\n\t" "# A[20] * B\n\t" - "ldr r8, [%[a], #80]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #80]\n\t" + "str r5, [%[r]], #4\n\t" "# A[21] * B\n\t" - "ldr r8, [%[a], #84]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #84]\n\t" + "str r3, [%[r]], #4\n\t" "# A[22] * B\n\t" - "ldr r8, [%[a], #88]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #88]\n\t" + "str r4, [%[r]], #4\n\t" "# A[23] * B\n\t" - "ldr r8, [%[a], #92]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #92]\n\t" + "str r5, [%[r]], #4\n\t" "# A[24] * B\n\t" - "ldr r8, [%[a], #96]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #96]\n\t" + "str r3, [%[r]], #4\n\t" "# A[25] * B\n\t" - "ldr r8, [%[a], #100]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #100]\n\t" + "str r4, [%[r]], #4\n\t" "# A[26] * B\n\t" - "ldr r8, [%[a], #104]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #104]\n\t" + "str r5, [%[r]], #4\n\t" "# A[27] * B\n\t" - "ldr r8, [%[a], #108]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #108]\n\t" + "str r3, [%[r]], #4\n\t" "# A[28] * B\n\t" - "ldr r8, [%[a], #112]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #112]\n\t" + "str r4, [%[r]], #4\n\t" "# A[29] * B\n\t" - "ldr r8, [%[a], #116]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #116]\n\t" + "str r5, [%[r]], #4\n\t" "# A[30] * B\n\t" - "ldr r8, [%[a], #120]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #120]\n\t" + "str r3, [%[r]], #4\n\t" "# A[31] * B\n\t" - "ldr r8, [%[a], #124]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #124]\n\t" + "str r4, [%[r]], #4\n\t" "# A[32] * B\n\t" - "ldr r8, [%[a], #128]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #128]\n\t" + "str r5, [%[r]], #4\n\t" "# A[33] * B\n\t" - "ldr r8, [%[a], #132]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #132]\n\t" + "str r3, [%[r]], #4\n\t" "# A[34] * B\n\t" - "ldr r8, [%[a], #136]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #136]\n\t" + "str r4, [%[r]], #4\n\t" "# A[35] * B\n\t" - "ldr r8, [%[a], #140]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #140]\n\t" + "str r5, [%[r]], #4\n\t" "# A[36] * B\n\t" - "ldr r8, [%[a], #144]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #144]\n\t" + "str r3, [%[r]], #4\n\t" "# A[37] * B\n\t" - "ldr r8, [%[a], #148]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #148]\n\t" + "str r4, [%[r]], #4\n\t" "# A[38] * B\n\t" - "ldr r8, [%[a], #152]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #152]\n\t" + "str r5, [%[r]], #4\n\t" "# A[39] * B\n\t" - "ldr r8, [%[a], #156]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #156]\n\t" + "str r3, [%[r]], #4\n\t" "# A[40] * B\n\t" - "ldr r8, [%[a], #160]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #160]\n\t" + "str r4, [%[r]], #4\n\t" "# A[41] * B\n\t" - "ldr r8, [%[a], #164]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #164]\n\t" + "str r5, [%[r]], #4\n\t" "# A[42] * B\n\t" - "ldr r8, [%[a], #168]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #168]\n\t" + "str r3, [%[r]], #4\n\t" "# A[43] * B\n\t" - "ldr r8, [%[a], #172]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #172]\n\t" + "str r4, [%[r]], #4\n\t" "# A[44] * B\n\t" - "ldr r8, [%[a], #176]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #176]\n\t" + "str r5, [%[r]], #4\n\t" "# A[45] * B\n\t" - "ldr r8, [%[a], #180]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #180]\n\t" + "str r3, [%[r]], #4\n\t" "# A[46] * B\n\t" - "ldr r8, [%[a], #184]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #184]\n\t" + "str r4, [%[r]], #4\n\t" "# A[47] * B\n\t" - "ldr r8, [%[a], #188]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #188]\n\t" + "str r5, [%[r]], #4\n\t" "# A[48] * B\n\t" - "ldr r8, [%[a], #192]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #192]\n\t" + "str r3, [%[r]], #4\n\t" "# A[49] * B\n\t" - "ldr r8, [%[a], #196]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #196]\n\t" + "str r4, [%[r]], #4\n\t" "# A[50] * B\n\t" - "ldr r8, [%[a], #200]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #200]\n\t" + "str r5, [%[r]], #4\n\t" "# A[51] * B\n\t" - "ldr r8, [%[a], #204]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #204]\n\t" + "str r3, [%[r]], #4\n\t" "# A[52] * B\n\t" - "ldr r8, [%[a], #208]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #208]\n\t" + "str r4, [%[r]], #4\n\t" "# A[53] * B\n\t" - "ldr r8, [%[a], #212]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #212]\n\t" + "str r5, [%[r]], #4\n\t" "# A[54] * B\n\t" - "ldr r8, [%[a], #216]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #216]\n\t" + "str r3, [%[r]], #4\n\t" "# A[55] * B\n\t" - "ldr r8, [%[a], #220]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #220]\n\t" + "str r4, [%[r]], #4\n\t" "# A[56] * B\n\t" - "ldr r8, [%[a], #224]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #224]\n\t" + "str r5, [%[r]], #4\n\t" "# A[57] * B\n\t" - "ldr r8, [%[a], #228]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #228]\n\t" + "str r3, [%[r]], #4\n\t" "# A[58] * B\n\t" - "ldr r8, [%[a], #232]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #232]\n\t" + "str r4, [%[r]], #4\n\t" "# A[59] * B\n\t" - "ldr r8, [%[a], #236]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #236]\n\t" + "str r5, [%[r]], #4\n\t" "# A[60] * B\n\t" - "ldr r8, [%[a], #240]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #240]\n\t" + "str r3, [%[r]], #4\n\t" "# A[61] * B\n\t" - "ldr r8, [%[a], #244]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #244]\n\t" + "str r4, [%[r]], #4\n\t" "# A[62] * B\n\t" - "ldr r8, [%[a], #248]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #248]\n\t" + "str r5, [%[r]], #4\n\t" "# A[63] * B\n\t" - "ldr r8, [%[a], #252]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adc r4, r4, r7\n\t" - "str r3, [%[r], #252]\n\t" - "str r4, [%[r], #256]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) + "str r3, [%[r]], #4\n\t" + "str r4, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); #endif @@ -3253,121 +3025,121 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig __asm__ __volatile__ ( "mov r9, #0\n\t" - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "subs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #24]\n\t" - "ldrd r4, r5, [%[a], #32]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #40]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #40]\n\t" - "ldrd r4, r5, [%[a], #48]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #56]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #56]\n\t" - "ldrd r4, r5, [%[a], #64]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #64]\n\t" - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #72]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #72]\n\t" - "ldrd r4, r5, [%[a], #80]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #80]\n\t" - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #88]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #88]\n\t" - "ldrd r4, r5, [%[a], #96]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #96]\n\t" - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #104]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #104]\n\t" - "ldrd r4, r5, [%[a], #112]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #112]\n\t" - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #120]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #120]\n\t" + "stm %[r]!, {r4, r5}\n\t" "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : [m] "r" (m) : "memory", "r4", "r5", "r6", "r7", "r8", "r9" ); #endif /* WOLFSSL_SP_SMALL */ @@ -3768,229 +3540,229 @@ static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r10, #0\n\t" "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r3, r4, %[b], r8\n\t" "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" + "str r4, [%[r]], #4\n\t" "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" + "str r5, [%[r]], #4\n\t" "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" + "str r3, [%[r]], #4\n\t" "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" + "str r4, [%[r]], #4\n\t" "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" + "str r5, [%[r]], #4\n\t" "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" + "str r3, [%[r]], #4\n\t" "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" + "str r4, [%[r]], #4\n\t" "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" + "str r5, [%[r]], #4\n\t" "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" + "str r3, [%[r]], #4\n\t" "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" + "str r4, [%[r]], #4\n\t" "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #44]\n\t" + "str r5, [%[r]], #4\n\t" "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #48]\n\t" + "str r3, [%[r]], #4\n\t" "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #52]\n\t" + "str r4, [%[r]], #4\n\t" "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #56]\n\t" + "str r5, [%[r]], #4\n\t" "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #60]\n\t" + "str r3, [%[r]], #4\n\t" "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #64]\n\t" + "str r4, [%[r]], #4\n\t" "# A[17] * B\n\t" - "ldr r8, [%[a], #68]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #68]\n\t" + "str r5, [%[r]], #4\n\t" "# A[18] * B\n\t" - "ldr r8, [%[a], #72]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #72]\n\t" + "str r3, [%[r]], #4\n\t" "# A[19] * B\n\t" - "ldr r8, [%[a], #76]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #76]\n\t" + "str r4, [%[r]], #4\n\t" "# A[20] * B\n\t" - "ldr r8, [%[a], #80]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #80]\n\t" + "str r5, [%[r]], #4\n\t" "# A[21] * B\n\t" - "ldr r8, [%[a], #84]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #84]\n\t" + "str r3, [%[r]], #4\n\t" "# A[22] * B\n\t" - "ldr r8, [%[a], #88]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #88]\n\t" + "str r4, [%[r]], #4\n\t" "# A[23] * B\n\t" - "ldr r8, [%[a], #92]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #92]\n\t" + "str r5, [%[r]], #4\n\t" "# A[24] * B\n\t" - "ldr r8, [%[a], #96]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #96]\n\t" + "str r3, [%[r]], #4\n\t" "# A[25] * B\n\t" - "ldr r8, [%[a], #100]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #100]\n\t" + "str r4, [%[r]], #4\n\t" "# A[26] * B\n\t" - "ldr r8, [%[a], #104]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #104]\n\t" + "str r5, [%[r]], #4\n\t" "# A[27] * B\n\t" - "ldr r8, [%[a], #108]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #108]\n\t" + "str r3, [%[r]], #4\n\t" "# A[28] * B\n\t" - "ldr r8, [%[a], #112]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #112]\n\t" + "str r4, [%[r]], #4\n\t" "# A[29] * B\n\t" - "ldr r8, [%[a], #116]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #116]\n\t" + "str r5, [%[r]], #4\n\t" "# A[30] * B\n\t" - "ldr r8, [%[a], #120]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #120]\n\t" + "str r3, [%[r]], #4\n\t" "# A[31] * B\n\t" - "ldr r8, [%[a], #124]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" - "str r4, [%[r], #124]\n\t" - "str r5, [%[r], #128]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); #endif @@ -4889,233 +4661,233 @@ static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_dig __asm__ __volatile__ ( "mov r9, #0\n\t" - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "subs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #24]\n\t" - "ldrd r4, r5, [%[a], #32]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #40]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #40]\n\t" - "ldrd r4, r5, [%[a], #48]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #56]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #56]\n\t" - "ldrd r4, r5, [%[a], #64]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #64]\n\t" - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #72]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #72]\n\t" - "ldrd r4, r5, [%[a], #80]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #80]\n\t" - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #88]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #88]\n\t" - "ldrd r4, r5, [%[a], #96]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #96]\n\t" - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #104]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #104]\n\t" - "ldrd r4, r5, [%[a], #112]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #112]\n\t" - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #120]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #120]\n\t" - "ldrd r4, r5, [%[a], #128]\n\t" - "ldrd r6, r7, [%[b], #128]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #128]\n\t" - "ldrd r4, r5, [%[a], #136]\n\t" - "ldrd r6, r7, [%[b], #136]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #136]\n\t" - "ldrd r4, r5, [%[a], #144]\n\t" - "ldrd r6, r7, [%[b], #144]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #144]\n\t" - "ldrd r4, r5, [%[a], #152]\n\t" - "ldrd r6, r7, [%[b], #152]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #152]\n\t" - "ldrd r4, r5, [%[a], #160]\n\t" - "ldrd r6, r7, [%[b], #160]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #160]\n\t" - "ldrd r4, r5, [%[a], #168]\n\t" - "ldrd r6, r7, [%[b], #168]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #168]\n\t" - "ldrd r4, r5, [%[a], #176]\n\t" - "ldrd r6, r7, [%[b], #176]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #176]\n\t" - "ldrd r4, r5, [%[a], #184]\n\t" - "ldrd r6, r7, [%[b], #184]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #184]\n\t" - "ldrd r4, r5, [%[a], #192]\n\t" - "ldrd r6, r7, [%[b], #192]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #192]\n\t" - "ldrd r4, r5, [%[a], #200]\n\t" - "ldrd r6, r7, [%[b], #200]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #200]\n\t" - "ldrd r4, r5, [%[a], #208]\n\t" - "ldrd r6, r7, [%[b], #208]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #208]\n\t" - "ldrd r4, r5, [%[a], #216]\n\t" - "ldrd r6, r7, [%[b], #216]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #216]\n\t" - "ldrd r4, r5, [%[a], #224]\n\t" - "ldrd r6, r7, [%[b], #224]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #224]\n\t" - "ldrd r4, r5, [%[a], #232]\n\t" - "ldrd r6, r7, [%[b], #232]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #232]\n\t" - "ldrd r4, r5, [%[a], #240]\n\t" - "ldrd r6, r7, [%[b], #240]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #240]\n\t" - "ldrd r4, r5, [%[a], #248]\n\t" - "ldrd r6, r7, [%[b], #248]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #248]\n\t" + "stm %[r]!, {r4, r5}\n\t" "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : [m] "r" (m) : "memory", "r4", "r5", "r6", "r7", "r8", "r9" ); #endif /* WOLFSSL_SP_SMALL */ @@ -5779,22 +5551,19 @@ static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, "add r14, %[a], #256\n\t" "\n1:\n\t" "rsbs %[c], %[c], #0\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "sbc %[c], r3, r3\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r4, r4\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -5813,170 +5582,122 @@ static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "subs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldrd r3, r4, [%[a], #64]\n\t" - "ldrd r5, r6, [%[a], #72]\n\t" - "ldrd r7, r8, [%[b], #64]\n\t" - "ldrd r9, r10, [%[b], #72]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #64]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "ldrd r3, r4, [%[a], #80]\n\t" - "ldrd r5, r6, [%[a], #88]\n\t" - "ldrd r7, r8, [%[b], #80]\n\t" - "ldrd r9, r10, [%[b], #88]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #80]\n\t" - "strd r5, r6, [%[r], #88]\n\t" - "ldrd r3, r4, [%[a], #96]\n\t" - "ldrd r5, r6, [%[a], #104]\n\t" - "ldrd r7, r8, [%[b], #96]\n\t" - "ldrd r9, r10, [%[b], #104]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #96]\n\t" - "strd r5, r6, [%[r], #104]\n\t" - "ldrd r3, r4, [%[a], #112]\n\t" - "ldrd r5, r6, [%[a], #120]\n\t" - "ldrd r7, r8, [%[b], #112]\n\t" - "ldrd r9, r10, [%[b], #120]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #112]\n\t" - "strd r5, r6, [%[r], #120]\n\t" - "ldrd r3, r4, [%[a], #128]\n\t" - "ldrd r5, r6, [%[a], #136]\n\t" - "ldrd r7, r8, [%[b], #128]\n\t" - "ldrd r9, r10, [%[b], #136]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #128]\n\t" - "strd r5, r6, [%[r], #136]\n\t" - "ldrd r3, r4, [%[a], #144]\n\t" - "ldrd r5, r6, [%[a], #152]\n\t" - "ldrd r7, r8, [%[b], #144]\n\t" - "ldrd r9, r10, [%[b], #152]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #144]\n\t" - "strd r5, r6, [%[r], #152]\n\t" - "ldrd r3, r4, [%[a], #160]\n\t" - "ldrd r5, r6, [%[a], #168]\n\t" - "ldrd r7, r8, [%[b], #160]\n\t" - "ldrd r9, r10, [%[b], #168]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #160]\n\t" - "strd r5, r6, [%[r], #168]\n\t" - "ldrd r3, r4, [%[a], #176]\n\t" - "ldrd r5, r6, [%[a], #184]\n\t" - "ldrd r7, r8, [%[b], #176]\n\t" - "ldrd r9, r10, [%[b], #184]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #176]\n\t" - "strd r5, r6, [%[r], #184]\n\t" - "ldrd r3, r4, [%[a], #192]\n\t" - "ldrd r5, r6, [%[a], #200]\n\t" - "ldrd r7, r8, [%[b], #192]\n\t" - "ldrd r9, r10, [%[b], #200]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #192]\n\t" - "strd r5, r6, [%[r], #200]\n\t" - "ldrd r3, r4, [%[a], #208]\n\t" - "ldrd r5, r6, [%[a], #216]\n\t" - "ldrd r7, r8, [%[b], #208]\n\t" - "ldrd r9, r10, [%[b], #216]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #208]\n\t" - "strd r5, r6, [%[r], #216]\n\t" - "ldrd r3, r4, [%[a], #224]\n\t" - "ldrd r5, r6, [%[a], #232]\n\t" - "ldrd r7, r8, [%[b], #224]\n\t" - "ldrd r9, r10, [%[b], #232]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #224]\n\t" - "strd r5, r6, [%[r], #232]\n\t" - "ldrd r3, r4, [%[a], #240]\n\t" - "ldrd r5, r6, [%[a], #248]\n\t" - "ldrd r7, r8, [%[b], #240]\n\t" - "ldrd r9, r10, [%[b], #248]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #240]\n\t" - "strd r5, r6, [%[r], #248]\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -8287,7 +8008,8 @@ static void sp_2048_lshift_64(sp_digit* r, const sp_digit* a, byte n) "lsl r3, r3, %[n]\n\t" "lsr r5, r5, r6\n\t" "orr r4, r4, r5\n\t" - "strd r3, r4, [%[r]]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" : : [r] "r" (r), [a] "r" (a), [n] "r" (n) : "memory", "r2", "r3", "r4", "r5", "r6" @@ -9714,40 +9436,31 @@ static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "adds r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "adc %[c], r14, r14\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -9763,70 +9476,52 @@ static sp_digit sp_3072_sub_in_place_24(sp_digit* a, const sp_digit* b) sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r2, r3, [%[a], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" - "ldrd r8, r9, [%[b], #8]\n\t" - "subs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #0]\n\t" - "strd r4, r5, [%[a], #8]\n\t" - "ldrd r2, r3, [%[a], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" - "ldrd r8, r9, [%[b], #24]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #16]\n\t" - "strd r4, r5, [%[a], #24]\n\t" - "ldrd r2, r3, [%[a], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" - "ldrd r8, r9, [%[b], #40]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #32]\n\t" - "strd r4, r5, [%[a], #40]\n\t" - "ldrd r2, r3, [%[a], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" - "ldrd r8, r9, [%[b], #56]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #48]\n\t" - "strd r4, r5, [%[a], #56]\n\t" - "ldrd r2, r3, [%[a], #64]\n\t" - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" - "ldrd r8, r9, [%[b], #72]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #64]\n\t" - "strd r4, r5, [%[a], #72]\n\t" - "ldrd r2, r3, [%[a], #80]\n\t" - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" - "ldrd r8, r9, [%[b], #88]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #80]\n\t" - "strd r4, r5, [%[a], #88]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r11, r11\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -9845,70 +9540,52 @@ static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "adds r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldrd r3, r4, [%[a], #64]\n\t" - "ldrd r5, r6, [%[a], #72]\n\t" - "ldrd r7, r8, [%[b], #64]\n\t" - "ldrd r9, r10, [%[b], #72]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #64]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "ldrd r3, r4, [%[a], #80]\n\t" - "ldrd r5, r6, [%[a], #88]\n\t" - "ldrd r7, r8, [%[b], #80]\n\t" - "ldrd r9, r10, [%[b], #88]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #80]\n\t" - "strd r5, r6, [%[r], #88]\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "adc %[c], r14, r14\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -9993,130 +9670,94 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r2, r3, [%[a], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" - "ldrd r8, r9, [%[b], #8]\n\t" - "subs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #0]\n\t" - "strd r4, r5, [%[a], #8]\n\t" - "ldrd r2, r3, [%[a], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" - "ldrd r8, r9, [%[b], #24]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #16]\n\t" - "strd r4, r5, [%[a], #24]\n\t" - "ldrd r2, r3, [%[a], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" - "ldrd r8, r9, [%[b], #40]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #32]\n\t" - "strd r4, r5, [%[a], #40]\n\t" - "ldrd r2, r3, [%[a], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" - "ldrd r8, r9, [%[b], #56]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #48]\n\t" - "strd r4, r5, [%[a], #56]\n\t" - "ldrd r2, r3, [%[a], #64]\n\t" - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" - "ldrd r8, r9, [%[b], #72]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #64]\n\t" - "strd r4, r5, [%[a], #72]\n\t" - "ldrd r2, r3, [%[a], #80]\n\t" - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" - "ldrd r8, r9, [%[b], #88]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #80]\n\t" - "strd r4, r5, [%[a], #88]\n\t" - "ldrd r2, r3, [%[a], #96]\n\t" - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" - "ldrd r8, r9, [%[b], #104]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #96]\n\t" - "strd r4, r5, [%[a], #104]\n\t" - "ldrd r2, r3, [%[a], #112]\n\t" - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" - "ldrd r8, r9, [%[b], #120]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #112]\n\t" - "strd r4, r5, [%[a], #120]\n\t" - "ldrd r2, r3, [%[a], #128]\n\t" - "ldrd r4, r5, [%[a], #136]\n\t" - "ldrd r6, r7, [%[b], #128]\n\t" - "ldrd r8, r9, [%[b], #136]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #128]\n\t" - "strd r4, r5, [%[a], #136]\n\t" - "ldrd r2, r3, [%[a], #144]\n\t" - "ldrd r4, r5, [%[a], #152]\n\t" - "ldrd r6, r7, [%[b], #144]\n\t" - "ldrd r8, r9, [%[b], #152]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #144]\n\t" - "strd r4, r5, [%[a], #152]\n\t" - "ldrd r2, r3, [%[a], #160]\n\t" - "ldrd r4, r5, [%[a], #168]\n\t" - "ldrd r6, r7, [%[b], #160]\n\t" - "ldrd r8, r9, [%[b], #168]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #160]\n\t" - "strd r4, r5, [%[a], #168]\n\t" - "ldrd r2, r3, [%[a], #176]\n\t" - "ldrd r4, r5, [%[a], #184]\n\t" - "ldrd r6, r7, [%[b], #176]\n\t" - "ldrd r8, r9, [%[b], #184]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #176]\n\t" - "strd r4, r5, [%[a], #184]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r11, r11\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -10135,130 +9776,94 @@ static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "adds r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldrd r3, r4, [%[a], #64]\n\t" - "ldrd r5, r6, [%[a], #72]\n\t" - "ldrd r7, r8, [%[b], #64]\n\t" - "ldrd r9, r10, [%[b], #72]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #64]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "ldrd r3, r4, [%[a], #80]\n\t" - "ldrd r5, r6, [%[a], #88]\n\t" - "ldrd r7, r8, [%[b], #80]\n\t" - "ldrd r9, r10, [%[b], #88]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #80]\n\t" - "strd r5, r6, [%[r], #88]\n\t" - "ldrd r3, r4, [%[a], #96]\n\t" - "ldrd r5, r6, [%[a], #104]\n\t" - "ldrd r7, r8, [%[b], #96]\n\t" - "ldrd r9, r10, [%[b], #104]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #96]\n\t" - "strd r5, r6, [%[r], #104]\n\t" - "ldrd r3, r4, [%[a], #112]\n\t" - "ldrd r5, r6, [%[a], #120]\n\t" - "ldrd r7, r8, [%[b], #112]\n\t" - "ldrd r9, r10, [%[b], #120]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #112]\n\t" - "strd r5, r6, [%[r], #120]\n\t" - "ldrd r3, r4, [%[a], #128]\n\t" - "ldrd r5, r6, [%[a], #136]\n\t" - "ldrd r7, r8, [%[b], #128]\n\t" - "ldrd r9, r10, [%[b], #136]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #128]\n\t" - "strd r5, r6, [%[r], #136]\n\t" - "ldrd r3, r4, [%[a], #144]\n\t" - "ldrd r5, r6, [%[a], #152]\n\t" - "ldrd r7, r8, [%[b], #144]\n\t" - "ldrd r9, r10, [%[b], #152]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #144]\n\t" - "strd r5, r6, [%[r], #152]\n\t" - "ldrd r3, r4, [%[a], #160]\n\t" - "ldrd r5, r6, [%[a], #168]\n\t" - "ldrd r7, r8, [%[b], #160]\n\t" - "ldrd r9, r10, [%[b], #168]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #160]\n\t" - "strd r5, r6, [%[r], #168]\n\t" - "ldrd r3, r4, [%[a], #176]\n\t" - "ldrd r5, r6, [%[a], #184]\n\t" - "ldrd r7, r8, [%[b], #176]\n\t" - "ldrd r9, r10, [%[b], #184]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #176]\n\t" - "strd r5, r6, [%[r], #184]\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "adc %[c], r14, r14\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -10343,250 +9948,178 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r2, r3, [%[a], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" - "ldrd r8, r9, [%[b], #8]\n\t" - "subs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #0]\n\t" - "strd r4, r5, [%[a], #8]\n\t" - "ldrd r2, r3, [%[a], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" - "ldrd r8, r9, [%[b], #24]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #16]\n\t" - "strd r4, r5, [%[a], #24]\n\t" - "ldrd r2, r3, [%[a], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" - "ldrd r8, r9, [%[b], #40]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #32]\n\t" - "strd r4, r5, [%[a], #40]\n\t" - "ldrd r2, r3, [%[a], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" - "ldrd r8, r9, [%[b], #56]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #48]\n\t" - "strd r4, r5, [%[a], #56]\n\t" - "ldrd r2, r3, [%[a], #64]\n\t" - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" - "ldrd r8, r9, [%[b], #72]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #64]\n\t" - "strd r4, r5, [%[a], #72]\n\t" - "ldrd r2, r3, [%[a], #80]\n\t" - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" - "ldrd r8, r9, [%[b], #88]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #80]\n\t" - "strd r4, r5, [%[a], #88]\n\t" - "ldrd r2, r3, [%[a], #96]\n\t" - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" - "ldrd r8, r9, [%[b], #104]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #96]\n\t" - "strd r4, r5, [%[a], #104]\n\t" - "ldrd r2, r3, [%[a], #112]\n\t" - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" - "ldrd r8, r9, [%[b], #120]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #112]\n\t" - "strd r4, r5, [%[a], #120]\n\t" - "ldrd r2, r3, [%[a], #128]\n\t" - "ldrd r4, r5, [%[a], #136]\n\t" - "ldrd r6, r7, [%[b], #128]\n\t" - "ldrd r8, r9, [%[b], #136]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #128]\n\t" - "strd r4, r5, [%[a], #136]\n\t" - "ldrd r2, r3, [%[a], #144]\n\t" - "ldrd r4, r5, [%[a], #152]\n\t" - "ldrd r6, r7, [%[b], #144]\n\t" - "ldrd r8, r9, [%[b], #152]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #144]\n\t" - "strd r4, r5, [%[a], #152]\n\t" - "ldrd r2, r3, [%[a], #160]\n\t" - "ldrd r4, r5, [%[a], #168]\n\t" - "ldrd r6, r7, [%[b], #160]\n\t" - "ldrd r8, r9, [%[b], #168]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #160]\n\t" - "strd r4, r5, [%[a], #168]\n\t" - "ldrd r2, r3, [%[a], #176]\n\t" - "ldrd r4, r5, [%[a], #184]\n\t" - "ldrd r6, r7, [%[b], #176]\n\t" - "ldrd r8, r9, [%[b], #184]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #176]\n\t" - "strd r4, r5, [%[a], #184]\n\t" - "ldrd r2, r3, [%[a], #192]\n\t" - "ldrd r4, r5, [%[a], #200]\n\t" - "ldrd r6, r7, [%[b], #192]\n\t" - "ldrd r8, r9, [%[b], #200]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #192]\n\t" - "strd r4, r5, [%[a], #200]\n\t" - "ldrd r2, r3, [%[a], #208]\n\t" - "ldrd r4, r5, [%[a], #216]\n\t" - "ldrd r6, r7, [%[b], #208]\n\t" - "ldrd r8, r9, [%[b], #216]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #208]\n\t" - "strd r4, r5, [%[a], #216]\n\t" - "ldrd r2, r3, [%[a], #224]\n\t" - "ldrd r4, r5, [%[a], #232]\n\t" - "ldrd r6, r7, [%[b], #224]\n\t" - "ldrd r8, r9, [%[b], #232]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #224]\n\t" - "strd r4, r5, [%[a], #232]\n\t" - "ldrd r2, r3, [%[a], #240]\n\t" - "ldrd r4, r5, [%[a], #248]\n\t" - "ldrd r6, r7, [%[b], #240]\n\t" - "ldrd r8, r9, [%[b], #248]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #240]\n\t" - "strd r4, r5, [%[a], #248]\n\t" - "ldrd r2, r3, [%[a], #256]\n\t" - "ldrd r4, r5, [%[a], #264]\n\t" - "ldrd r6, r7, [%[b], #256]\n\t" - "ldrd r8, r9, [%[b], #264]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #256]\n\t" - "strd r4, r5, [%[a], #264]\n\t" - "ldrd r2, r3, [%[a], #272]\n\t" - "ldrd r4, r5, [%[a], #280]\n\t" - "ldrd r6, r7, [%[b], #272]\n\t" - "ldrd r8, r9, [%[b], #280]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #272]\n\t" - "strd r4, r5, [%[a], #280]\n\t" - "ldrd r2, r3, [%[a], #288]\n\t" - "ldrd r4, r5, [%[a], #296]\n\t" - "ldrd r6, r7, [%[b], #288]\n\t" - "ldrd r8, r9, [%[b], #296]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #288]\n\t" - "strd r4, r5, [%[a], #296]\n\t" - "ldrd r2, r3, [%[a], #304]\n\t" - "ldrd r4, r5, [%[a], #312]\n\t" - "ldrd r6, r7, [%[b], #304]\n\t" - "ldrd r8, r9, [%[b], #312]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #304]\n\t" - "strd r4, r5, [%[a], #312]\n\t" - "ldrd r2, r3, [%[a], #320]\n\t" - "ldrd r4, r5, [%[a], #328]\n\t" - "ldrd r6, r7, [%[b], #320]\n\t" - "ldrd r8, r9, [%[b], #328]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #320]\n\t" - "strd r4, r5, [%[a], #328]\n\t" - "ldrd r2, r3, [%[a], #336]\n\t" - "ldrd r4, r5, [%[a], #344]\n\t" - "ldrd r6, r7, [%[b], #336]\n\t" - "ldrd r8, r9, [%[b], #344]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #336]\n\t" - "strd r4, r5, [%[a], #344]\n\t" - "ldrd r2, r3, [%[a], #352]\n\t" - "ldrd r4, r5, [%[a], #360]\n\t" - "ldrd r6, r7, [%[b], #352]\n\t" - "ldrd r8, r9, [%[b], #360]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #352]\n\t" - "strd r4, r5, [%[a], #360]\n\t" - "ldrd r2, r3, [%[a], #368]\n\t" - "ldrd r4, r5, [%[a], #376]\n\t" - "ldrd r6, r7, [%[b], #368]\n\t" - "ldrd r8, r9, [%[b], #376]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #368]\n\t" - "strd r4, r5, [%[a], #376]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r11, r11\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -10605,250 +10138,178 @@ static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "adds r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldrd r3, r4, [%[a], #64]\n\t" - "ldrd r5, r6, [%[a], #72]\n\t" - "ldrd r7, r8, [%[b], #64]\n\t" - "ldrd r9, r10, [%[b], #72]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #64]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "ldrd r3, r4, [%[a], #80]\n\t" - "ldrd r5, r6, [%[a], #88]\n\t" - "ldrd r7, r8, [%[b], #80]\n\t" - "ldrd r9, r10, [%[b], #88]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #80]\n\t" - "strd r5, r6, [%[r], #88]\n\t" - "ldrd r3, r4, [%[a], #96]\n\t" - "ldrd r5, r6, [%[a], #104]\n\t" - "ldrd r7, r8, [%[b], #96]\n\t" - "ldrd r9, r10, [%[b], #104]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #96]\n\t" - "strd r5, r6, [%[r], #104]\n\t" - "ldrd r3, r4, [%[a], #112]\n\t" - "ldrd r5, r6, [%[a], #120]\n\t" - "ldrd r7, r8, [%[b], #112]\n\t" - "ldrd r9, r10, [%[b], #120]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #112]\n\t" - "strd r5, r6, [%[r], #120]\n\t" - "ldrd r3, r4, [%[a], #128]\n\t" - "ldrd r5, r6, [%[a], #136]\n\t" - "ldrd r7, r8, [%[b], #128]\n\t" - "ldrd r9, r10, [%[b], #136]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #128]\n\t" - "strd r5, r6, [%[r], #136]\n\t" - "ldrd r3, r4, [%[a], #144]\n\t" - "ldrd r5, r6, [%[a], #152]\n\t" - "ldrd r7, r8, [%[b], #144]\n\t" - "ldrd r9, r10, [%[b], #152]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #144]\n\t" - "strd r5, r6, [%[r], #152]\n\t" - "ldrd r3, r4, [%[a], #160]\n\t" - "ldrd r5, r6, [%[a], #168]\n\t" - "ldrd r7, r8, [%[b], #160]\n\t" - "ldrd r9, r10, [%[b], #168]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #160]\n\t" - "strd r5, r6, [%[r], #168]\n\t" - "ldrd r3, r4, [%[a], #176]\n\t" - "ldrd r5, r6, [%[a], #184]\n\t" - "ldrd r7, r8, [%[b], #176]\n\t" - "ldrd r9, r10, [%[b], #184]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #176]\n\t" - "strd r5, r6, [%[r], #184]\n\t" - "ldrd r3, r4, [%[a], #192]\n\t" - "ldrd r5, r6, [%[a], #200]\n\t" - "ldrd r7, r8, [%[b], #192]\n\t" - "ldrd r9, r10, [%[b], #200]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #192]\n\t" - "strd r5, r6, [%[r], #200]\n\t" - "ldrd r3, r4, [%[a], #208]\n\t" - "ldrd r5, r6, [%[a], #216]\n\t" - "ldrd r7, r8, [%[b], #208]\n\t" - "ldrd r9, r10, [%[b], #216]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #208]\n\t" - "strd r5, r6, [%[r], #216]\n\t" - "ldrd r3, r4, [%[a], #224]\n\t" - "ldrd r5, r6, [%[a], #232]\n\t" - "ldrd r7, r8, [%[b], #224]\n\t" - "ldrd r9, r10, [%[b], #232]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #224]\n\t" - "strd r5, r6, [%[r], #232]\n\t" - "ldrd r3, r4, [%[a], #240]\n\t" - "ldrd r5, r6, [%[a], #248]\n\t" - "ldrd r7, r8, [%[b], #240]\n\t" - "ldrd r9, r10, [%[b], #248]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #240]\n\t" - "strd r5, r6, [%[r], #248]\n\t" - "ldrd r3, r4, [%[a], #256]\n\t" - "ldrd r5, r6, [%[a], #264]\n\t" - "ldrd r7, r8, [%[b], #256]\n\t" - "ldrd r9, r10, [%[b], #264]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #256]\n\t" - "strd r5, r6, [%[r], #264]\n\t" - "ldrd r3, r4, [%[a], #272]\n\t" - "ldrd r5, r6, [%[a], #280]\n\t" - "ldrd r7, r8, [%[b], #272]\n\t" - "ldrd r9, r10, [%[b], #280]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #272]\n\t" - "strd r5, r6, [%[r], #280]\n\t" - "ldrd r3, r4, [%[a], #288]\n\t" - "ldrd r5, r6, [%[a], #296]\n\t" - "ldrd r7, r8, [%[b], #288]\n\t" - "ldrd r9, r10, [%[b], #296]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #288]\n\t" - "strd r5, r6, [%[r], #296]\n\t" - "ldrd r3, r4, [%[a], #304]\n\t" - "ldrd r5, r6, [%[a], #312]\n\t" - "ldrd r7, r8, [%[b], #304]\n\t" - "ldrd r9, r10, [%[b], #312]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #304]\n\t" - "strd r5, r6, [%[r], #312]\n\t" - "ldrd r3, r4, [%[a], #320]\n\t" - "ldrd r5, r6, [%[a], #328]\n\t" - "ldrd r7, r8, [%[b], #320]\n\t" - "ldrd r9, r10, [%[b], #328]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #320]\n\t" - "strd r5, r6, [%[r], #328]\n\t" - "ldrd r3, r4, [%[a], #336]\n\t" - "ldrd r5, r6, [%[a], #344]\n\t" - "ldrd r7, r8, [%[b], #336]\n\t" - "ldrd r9, r10, [%[b], #344]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #336]\n\t" - "strd r5, r6, [%[r], #344]\n\t" - "ldrd r3, r4, [%[a], #352]\n\t" - "ldrd r5, r6, [%[a], #360]\n\t" - "ldrd r7, r8, [%[b], #352]\n\t" - "ldrd r9, r10, [%[b], #360]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #352]\n\t" - "strd r5, r6, [%[r], #360]\n\t" - "ldrd r3, r4, [%[a], #368]\n\t" - "ldrd r5, r6, [%[a], #376]\n\t" - "ldrd r7, r8, [%[b], #368]\n\t" - "ldrd r9, r10, [%[b], #376]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #368]\n\t" - "strd r5, r6, [%[r], #376]\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "adc %[c], r14, r14\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -11614,40 +11075,31 @@ static sp_digit sp_3072_sub_12(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "subs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -11701,70 +11153,52 @@ static sp_digit sp_3072_sub_24(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "subs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldrd r3, r4, [%[a], #64]\n\t" - "ldrd r5, r6, [%[a], #72]\n\t" - "ldrd r7, r8, [%[b], #64]\n\t" - "ldrd r9, r10, [%[b], #72]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #64]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "ldrd r3, r4, [%[a], #80]\n\t" - "ldrd r5, r6, [%[a], #88]\n\t" - "ldrd r7, r8, [%[b], #80]\n\t" - "ldrd r9, r10, [%[b], #88]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #80]\n\t" - "strd r5, r6, [%[r], #88]\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -11818,130 +11252,94 @@ static sp_digit sp_3072_sub_48(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "subs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldrd r3, r4, [%[a], #64]\n\t" - "ldrd r5, r6, [%[a], #72]\n\t" - "ldrd r7, r8, [%[b], #64]\n\t" - "ldrd r9, r10, [%[b], #72]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #64]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "ldrd r3, r4, [%[a], #80]\n\t" - "ldrd r5, r6, [%[a], #88]\n\t" - "ldrd r7, r8, [%[b], #80]\n\t" - "ldrd r9, r10, [%[b], #88]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #80]\n\t" - "strd r5, r6, [%[r], #88]\n\t" - "ldrd r3, r4, [%[a], #96]\n\t" - "ldrd r5, r6, [%[a], #104]\n\t" - "ldrd r7, r8, [%[b], #96]\n\t" - "ldrd r9, r10, [%[b], #104]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #96]\n\t" - "strd r5, r6, [%[r], #104]\n\t" - "ldrd r3, r4, [%[a], #112]\n\t" - "ldrd r5, r6, [%[a], #120]\n\t" - "ldrd r7, r8, [%[b], #112]\n\t" - "ldrd r9, r10, [%[b], #120]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #112]\n\t" - "strd r5, r6, [%[r], #120]\n\t" - "ldrd r3, r4, [%[a], #128]\n\t" - "ldrd r5, r6, [%[a], #136]\n\t" - "ldrd r7, r8, [%[b], #128]\n\t" - "ldrd r9, r10, [%[b], #136]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #128]\n\t" - "strd r5, r6, [%[r], #136]\n\t" - "ldrd r3, r4, [%[a], #144]\n\t" - "ldrd r5, r6, [%[a], #152]\n\t" - "ldrd r7, r8, [%[b], #144]\n\t" - "ldrd r9, r10, [%[b], #152]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #144]\n\t" - "strd r5, r6, [%[r], #152]\n\t" - "ldrd r3, r4, [%[a], #160]\n\t" - "ldrd r5, r6, [%[a], #168]\n\t" - "ldrd r7, r8, [%[b], #160]\n\t" - "ldrd r9, r10, [%[b], #168]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #160]\n\t" - "strd r5, r6, [%[r], #168]\n\t" - "ldrd r3, r4, [%[a], #176]\n\t" - "ldrd r5, r6, [%[a], #184]\n\t" - "ldrd r7, r8, [%[b], #176]\n\t" - "ldrd r9, r10, [%[b], #184]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #176]\n\t" - "strd r5, r6, [%[r], #184]\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -12000,23 +11398,20 @@ static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a, "add r14, %[a], #384\n\t" "\n1:\n\t" "adds %[c], %[c], #-1\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "adcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "mov r3, #0\n\t" - "adc %[c], r3, #0\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -12038,22 +11433,19 @@ static sp_digit sp_3072_sub_in_place_96(sp_digit* a, const sp_digit* b) "add r12, %[a], #384\n\t" "\n1:\n\t" "subs %[c], r14, %[c]\n\t" - "ldrd r3, r4, [%[a]]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[a]], #8\n\t" - "strd r5, r6, [%[a]], #8\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" "sbc %[c], r14, r14\n\t" "cmp %[a], r12\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" ); return c; @@ -12227,23 +11619,20 @@ static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a, "add r14, %[a], #192\n\t" "\n1:\n\t" "adds %[c], %[c], #-1\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "adcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "mov r3, #0\n\t" - "adc %[c], r3, #0\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -12265,22 +11654,19 @@ static sp_digit sp_3072_sub_in_place_48(sp_digit* a, const sp_digit* b) "add r12, %[a], #192\n\t" "\n1:\n\t" "subs %[c], r14, %[c]\n\t" - "ldrd r3, r4, [%[a]]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[a]], #8\n\t" - "strd r5, r6, [%[a]], #8\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" "sbc %[c], r14, r14\n\t" "cmp %[a], r12\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" ); return c; @@ -12483,677 +11869,677 @@ static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r10, #0\n\t" "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r3, r4, %[b], r8\n\t" "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" + "str r4, [%[r]], #4\n\t" "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" + "str r5, [%[r]], #4\n\t" "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" + "str r3, [%[r]], #4\n\t" "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" + "str r4, [%[r]], #4\n\t" "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" + "str r5, [%[r]], #4\n\t" "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" + "str r3, [%[r]], #4\n\t" "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" + "str r4, [%[r]], #4\n\t" "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" + "str r5, [%[r]], #4\n\t" "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" + "str r3, [%[r]], #4\n\t" "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" + "str r4, [%[r]], #4\n\t" "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #44]\n\t" + "str r5, [%[r]], #4\n\t" "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #48]\n\t" + "str r3, [%[r]], #4\n\t" "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #52]\n\t" + "str r4, [%[r]], #4\n\t" "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #56]\n\t" + "str r5, [%[r]], #4\n\t" "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #60]\n\t" + "str r3, [%[r]], #4\n\t" "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #64]\n\t" + "str r4, [%[r]], #4\n\t" "# A[17] * B\n\t" - "ldr r8, [%[a], #68]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #68]\n\t" + "str r5, [%[r]], #4\n\t" "# A[18] * B\n\t" - "ldr r8, [%[a], #72]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #72]\n\t" + "str r3, [%[r]], #4\n\t" "# A[19] * B\n\t" - "ldr r8, [%[a], #76]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #76]\n\t" + "str r4, [%[r]], #4\n\t" "# A[20] * B\n\t" - "ldr r8, [%[a], #80]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #80]\n\t" + "str r5, [%[r]], #4\n\t" "# A[21] * B\n\t" - "ldr r8, [%[a], #84]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #84]\n\t" + "str r3, [%[r]], #4\n\t" "# A[22] * B\n\t" - "ldr r8, [%[a], #88]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #88]\n\t" + "str r4, [%[r]], #4\n\t" "# A[23] * B\n\t" - "ldr r8, [%[a], #92]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #92]\n\t" + "str r5, [%[r]], #4\n\t" "# A[24] * B\n\t" - "ldr r8, [%[a], #96]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #96]\n\t" + "str r3, [%[r]], #4\n\t" "# A[25] * B\n\t" - "ldr r8, [%[a], #100]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #100]\n\t" + "str r4, [%[r]], #4\n\t" "# A[26] * B\n\t" - "ldr r8, [%[a], #104]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #104]\n\t" + "str r5, [%[r]], #4\n\t" "# A[27] * B\n\t" - "ldr r8, [%[a], #108]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #108]\n\t" + "str r3, [%[r]], #4\n\t" "# A[28] * B\n\t" - "ldr r8, [%[a], #112]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #112]\n\t" + "str r4, [%[r]], #4\n\t" "# A[29] * B\n\t" - "ldr r8, [%[a], #116]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #116]\n\t" + "str r5, [%[r]], #4\n\t" "# A[30] * B\n\t" - "ldr r8, [%[a], #120]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #120]\n\t" + "str r3, [%[r]], #4\n\t" "# A[31] * B\n\t" - "ldr r8, [%[a], #124]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #124]\n\t" + "str r4, [%[r]], #4\n\t" "# A[32] * B\n\t" - "ldr r8, [%[a], #128]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #128]\n\t" + "str r5, [%[r]], #4\n\t" "# A[33] * B\n\t" - "ldr r8, [%[a], #132]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #132]\n\t" + "str r3, [%[r]], #4\n\t" "# A[34] * B\n\t" - "ldr r8, [%[a], #136]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #136]\n\t" + "str r4, [%[r]], #4\n\t" "# A[35] * B\n\t" - "ldr r8, [%[a], #140]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #140]\n\t" + "str r5, [%[r]], #4\n\t" "# A[36] * B\n\t" - "ldr r8, [%[a], #144]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #144]\n\t" + "str r3, [%[r]], #4\n\t" "# A[37] * B\n\t" - "ldr r8, [%[a], #148]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #148]\n\t" + "str r4, [%[r]], #4\n\t" "# A[38] * B\n\t" - "ldr r8, [%[a], #152]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #152]\n\t" + "str r5, [%[r]], #4\n\t" "# A[39] * B\n\t" - "ldr r8, [%[a], #156]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #156]\n\t" + "str r3, [%[r]], #4\n\t" "# A[40] * B\n\t" - "ldr r8, [%[a], #160]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #160]\n\t" + "str r4, [%[r]], #4\n\t" "# A[41] * B\n\t" - "ldr r8, [%[a], #164]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #164]\n\t" + "str r5, [%[r]], #4\n\t" "# A[42] * B\n\t" - "ldr r8, [%[a], #168]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #168]\n\t" + "str r3, [%[r]], #4\n\t" "# A[43] * B\n\t" - "ldr r8, [%[a], #172]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #172]\n\t" + "str r4, [%[r]], #4\n\t" "# A[44] * B\n\t" - "ldr r8, [%[a], #176]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #176]\n\t" + "str r5, [%[r]], #4\n\t" "# A[45] * B\n\t" - "ldr r8, [%[a], #180]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #180]\n\t" + "str r3, [%[r]], #4\n\t" "# A[46] * B\n\t" - "ldr r8, [%[a], #184]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #184]\n\t" + "str r4, [%[r]], #4\n\t" "# A[47] * B\n\t" - "ldr r8, [%[a], #188]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #188]\n\t" + "str r5, [%[r]], #4\n\t" "# A[48] * B\n\t" - "ldr r8, [%[a], #192]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #192]\n\t" + "str r3, [%[r]], #4\n\t" "# A[49] * B\n\t" - "ldr r8, [%[a], #196]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #196]\n\t" + "str r4, [%[r]], #4\n\t" "# A[50] * B\n\t" - "ldr r8, [%[a], #200]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #200]\n\t" + "str r5, [%[r]], #4\n\t" "# A[51] * B\n\t" - "ldr r8, [%[a], #204]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #204]\n\t" + "str r3, [%[r]], #4\n\t" "# A[52] * B\n\t" - "ldr r8, [%[a], #208]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #208]\n\t" + "str r4, [%[r]], #4\n\t" "# A[53] * B\n\t" - "ldr r8, [%[a], #212]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #212]\n\t" + "str r5, [%[r]], #4\n\t" "# A[54] * B\n\t" - "ldr r8, [%[a], #216]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #216]\n\t" + "str r3, [%[r]], #4\n\t" "# A[55] * B\n\t" - "ldr r8, [%[a], #220]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #220]\n\t" + "str r4, [%[r]], #4\n\t" "# A[56] * B\n\t" - "ldr r8, [%[a], #224]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #224]\n\t" + "str r5, [%[r]], #4\n\t" "# A[57] * B\n\t" - "ldr r8, [%[a], #228]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #228]\n\t" + "str r3, [%[r]], #4\n\t" "# A[58] * B\n\t" - "ldr r8, [%[a], #232]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #232]\n\t" + "str r4, [%[r]], #4\n\t" "# A[59] * B\n\t" - "ldr r8, [%[a], #236]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #236]\n\t" + "str r5, [%[r]], #4\n\t" "# A[60] * B\n\t" - "ldr r8, [%[a], #240]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #240]\n\t" + "str r3, [%[r]], #4\n\t" "# A[61] * B\n\t" - "ldr r8, [%[a], #244]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #244]\n\t" + "str r4, [%[r]], #4\n\t" "# A[62] * B\n\t" - "ldr r8, [%[a], #248]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #248]\n\t" + "str r5, [%[r]], #4\n\t" "# A[63] * B\n\t" - "ldr r8, [%[a], #252]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #252]\n\t" + "str r3, [%[r]], #4\n\t" "# A[64] * B\n\t" - "ldr r8, [%[a], #256]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #256]\n\t" + "str r4, [%[r]], #4\n\t" "# A[65] * B\n\t" - "ldr r8, [%[a], #260]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #260]\n\t" + "str r5, [%[r]], #4\n\t" "# A[66] * B\n\t" - "ldr r8, [%[a], #264]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #264]\n\t" + "str r3, [%[r]], #4\n\t" "# A[67] * B\n\t" - "ldr r8, [%[a], #268]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #268]\n\t" + "str r4, [%[r]], #4\n\t" "# A[68] * B\n\t" - "ldr r8, [%[a], #272]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #272]\n\t" + "str r5, [%[r]], #4\n\t" "# A[69] * B\n\t" - "ldr r8, [%[a], #276]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #276]\n\t" + "str r3, [%[r]], #4\n\t" "# A[70] * B\n\t" - "ldr r8, [%[a], #280]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #280]\n\t" + "str r4, [%[r]], #4\n\t" "# A[71] * B\n\t" - "ldr r8, [%[a], #284]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #284]\n\t" + "str r5, [%[r]], #4\n\t" "# A[72] * B\n\t" - "ldr r8, [%[a], #288]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #288]\n\t" + "str r3, [%[r]], #4\n\t" "# A[73] * B\n\t" - "ldr r8, [%[a], #292]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #292]\n\t" + "str r4, [%[r]], #4\n\t" "# A[74] * B\n\t" - "ldr r8, [%[a], #296]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #296]\n\t" + "str r5, [%[r]], #4\n\t" "# A[75] * B\n\t" - "ldr r8, [%[a], #300]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #300]\n\t" + "str r3, [%[r]], #4\n\t" "# A[76] * B\n\t" - "ldr r8, [%[a], #304]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #304]\n\t" + "str r4, [%[r]], #4\n\t" "# A[77] * B\n\t" - "ldr r8, [%[a], #308]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #308]\n\t" + "str r5, [%[r]], #4\n\t" "# A[78] * B\n\t" - "ldr r8, [%[a], #312]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #312]\n\t" + "str r3, [%[r]], #4\n\t" "# A[79] * B\n\t" - "ldr r8, [%[a], #316]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #316]\n\t" + "str r4, [%[r]], #4\n\t" "# A[80] * B\n\t" - "ldr r8, [%[a], #320]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #320]\n\t" + "str r5, [%[r]], #4\n\t" "# A[81] * B\n\t" - "ldr r8, [%[a], #324]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #324]\n\t" + "str r3, [%[r]], #4\n\t" "# A[82] * B\n\t" - "ldr r8, [%[a], #328]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #328]\n\t" + "str r4, [%[r]], #4\n\t" "# A[83] * B\n\t" - "ldr r8, [%[a], #332]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #332]\n\t" + "str r5, [%[r]], #4\n\t" "# A[84] * B\n\t" - "ldr r8, [%[a], #336]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #336]\n\t" + "str r3, [%[r]], #4\n\t" "# A[85] * B\n\t" - "ldr r8, [%[a], #340]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #340]\n\t" + "str r4, [%[r]], #4\n\t" "# A[86] * B\n\t" - "ldr r8, [%[a], #344]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #344]\n\t" + "str r5, [%[r]], #4\n\t" "# A[87] * B\n\t" - "ldr r8, [%[a], #348]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #348]\n\t" + "str r3, [%[r]], #4\n\t" "# A[88] * B\n\t" - "ldr r8, [%[a], #352]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #352]\n\t" + "str r4, [%[r]], #4\n\t" "# A[89] * B\n\t" - "ldr r8, [%[a], #356]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #356]\n\t" + "str r5, [%[r]], #4\n\t" "# A[90] * B\n\t" - "ldr r8, [%[a], #360]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #360]\n\t" + "str r3, [%[r]], #4\n\t" "# A[91] * B\n\t" - "ldr r8, [%[a], #364]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #364]\n\t" + "str r4, [%[r]], #4\n\t" "# A[92] * B\n\t" - "ldr r8, [%[a], #368]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #368]\n\t" + "str r5, [%[r]], #4\n\t" "# A[93] * B\n\t" - "ldr r8, [%[a], #372]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #372]\n\t" + "str r3, [%[r]], #4\n\t" "# A[94] * B\n\t" - "ldr r8, [%[a], #376]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #376]\n\t" + "str r4, [%[r]], #4\n\t" "# A[95] * B\n\t" - "ldr r8, [%[a], #380]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adc r3, r3, r7\n\t" - "str r5, [%[r], #380]\n\t" - "str r3, [%[r], #384]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) + "str r5, [%[r]], #4\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); #endif @@ -13210,177 +12596,177 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_dig __asm__ __volatile__ ( "mov r9, #0\n\t" - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "subs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #24]\n\t" - "ldrd r4, r5, [%[a], #32]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #40]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #40]\n\t" - "ldrd r4, r5, [%[a], #48]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #56]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #56]\n\t" - "ldrd r4, r5, [%[a], #64]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #64]\n\t" - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #72]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #72]\n\t" - "ldrd r4, r5, [%[a], #80]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #80]\n\t" - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #88]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #88]\n\t" - "ldrd r4, r5, [%[a], #96]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #96]\n\t" - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #104]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #104]\n\t" - "ldrd r4, r5, [%[a], #112]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #112]\n\t" - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #120]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #120]\n\t" - "ldrd r4, r5, [%[a], #128]\n\t" - "ldrd r6, r7, [%[b], #128]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #128]\n\t" - "ldrd r4, r5, [%[a], #136]\n\t" - "ldrd r6, r7, [%[b], #136]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #136]\n\t" - "ldrd r4, r5, [%[a], #144]\n\t" - "ldrd r6, r7, [%[b], #144]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #144]\n\t" - "ldrd r4, r5, [%[a], #152]\n\t" - "ldrd r6, r7, [%[b], #152]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #152]\n\t" - "ldrd r4, r5, [%[a], #160]\n\t" - "ldrd r6, r7, [%[b], #160]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #160]\n\t" - "ldrd r4, r5, [%[a], #168]\n\t" - "ldrd r6, r7, [%[b], #168]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #168]\n\t" - "ldrd r4, r5, [%[a], #176]\n\t" - "ldrd r6, r7, [%[b], #176]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #176]\n\t" - "ldrd r4, r5, [%[a], #184]\n\t" - "ldrd r6, r7, [%[b], #184]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #184]\n\t" + "stm %[r]!, {r4, r5}\n\t" "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : [m] "r" (m) : "memory", "r4", "r5", "r6", "r7", "r8", "r9" ); #endif /* WOLFSSL_SP_SMALL */ @@ -13925,341 +13311,341 @@ static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r10, #0\n\t" "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r3, r4, %[b], r8\n\t" "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" + "str r4, [%[r]], #4\n\t" "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" + "str r5, [%[r]], #4\n\t" "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" + "str r3, [%[r]], #4\n\t" "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" + "str r4, [%[r]], #4\n\t" "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" + "str r5, [%[r]], #4\n\t" "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" + "str r3, [%[r]], #4\n\t" "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" + "str r4, [%[r]], #4\n\t" "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" + "str r5, [%[r]], #4\n\t" "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" + "str r3, [%[r]], #4\n\t" "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" + "str r4, [%[r]], #4\n\t" "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #44]\n\t" + "str r5, [%[r]], #4\n\t" "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #48]\n\t" + "str r3, [%[r]], #4\n\t" "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #52]\n\t" + "str r4, [%[r]], #4\n\t" "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #56]\n\t" + "str r5, [%[r]], #4\n\t" "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #60]\n\t" + "str r3, [%[r]], #4\n\t" "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #64]\n\t" + "str r4, [%[r]], #4\n\t" "# A[17] * B\n\t" - "ldr r8, [%[a], #68]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #68]\n\t" + "str r5, [%[r]], #4\n\t" "# A[18] * B\n\t" - "ldr r8, [%[a], #72]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #72]\n\t" + "str r3, [%[r]], #4\n\t" "# A[19] * B\n\t" - "ldr r8, [%[a], #76]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #76]\n\t" + "str r4, [%[r]], #4\n\t" "# A[20] * B\n\t" - "ldr r8, [%[a], #80]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #80]\n\t" + "str r5, [%[r]], #4\n\t" "# A[21] * B\n\t" - "ldr r8, [%[a], #84]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #84]\n\t" + "str r3, [%[r]], #4\n\t" "# A[22] * B\n\t" - "ldr r8, [%[a], #88]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #88]\n\t" + "str r4, [%[r]], #4\n\t" "# A[23] * B\n\t" - "ldr r8, [%[a], #92]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #92]\n\t" + "str r5, [%[r]], #4\n\t" "# A[24] * B\n\t" - "ldr r8, [%[a], #96]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #96]\n\t" + "str r3, [%[r]], #4\n\t" "# A[25] * B\n\t" - "ldr r8, [%[a], #100]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #100]\n\t" + "str r4, [%[r]], #4\n\t" "# A[26] * B\n\t" - "ldr r8, [%[a], #104]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #104]\n\t" + "str r5, [%[r]], #4\n\t" "# A[27] * B\n\t" - "ldr r8, [%[a], #108]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #108]\n\t" + "str r3, [%[r]], #4\n\t" "# A[28] * B\n\t" - "ldr r8, [%[a], #112]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #112]\n\t" + "str r4, [%[r]], #4\n\t" "# A[29] * B\n\t" - "ldr r8, [%[a], #116]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #116]\n\t" + "str r5, [%[r]], #4\n\t" "# A[30] * B\n\t" - "ldr r8, [%[a], #120]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #120]\n\t" + "str r3, [%[r]], #4\n\t" "# A[31] * B\n\t" - "ldr r8, [%[a], #124]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #124]\n\t" + "str r4, [%[r]], #4\n\t" "# A[32] * B\n\t" - "ldr r8, [%[a], #128]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #128]\n\t" + "str r5, [%[r]], #4\n\t" "# A[33] * B\n\t" - "ldr r8, [%[a], #132]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #132]\n\t" + "str r3, [%[r]], #4\n\t" "# A[34] * B\n\t" - "ldr r8, [%[a], #136]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #136]\n\t" + "str r4, [%[r]], #4\n\t" "# A[35] * B\n\t" - "ldr r8, [%[a], #140]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #140]\n\t" + "str r5, [%[r]], #4\n\t" "# A[36] * B\n\t" - "ldr r8, [%[a], #144]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #144]\n\t" + "str r3, [%[r]], #4\n\t" "# A[37] * B\n\t" - "ldr r8, [%[a], #148]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #148]\n\t" + "str r4, [%[r]], #4\n\t" "# A[38] * B\n\t" - "ldr r8, [%[a], #152]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #152]\n\t" + "str r5, [%[r]], #4\n\t" "# A[39] * B\n\t" - "ldr r8, [%[a], #156]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #156]\n\t" + "str r3, [%[r]], #4\n\t" "# A[40] * B\n\t" - "ldr r8, [%[a], #160]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #160]\n\t" + "str r4, [%[r]], #4\n\t" "# A[41] * B\n\t" - "ldr r8, [%[a], #164]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #164]\n\t" + "str r5, [%[r]], #4\n\t" "# A[42] * B\n\t" - "ldr r8, [%[a], #168]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #168]\n\t" + "str r3, [%[r]], #4\n\t" "# A[43] * B\n\t" - "ldr r8, [%[a], #172]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #172]\n\t" + "str r4, [%[r]], #4\n\t" "# A[44] * B\n\t" - "ldr r8, [%[a], #176]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #176]\n\t" + "str r5, [%[r]], #4\n\t" "# A[45] * B\n\t" - "ldr r8, [%[a], #180]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #180]\n\t" + "str r3, [%[r]], #4\n\t" "# A[46] * B\n\t" - "ldr r8, [%[a], #184]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #184]\n\t" + "str r4, [%[r]], #4\n\t" "# A[47] * B\n\t" - "ldr r8, [%[a], #188]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adc r3, r3, r7\n\t" - "str r5, [%[r], #188]\n\t" - "str r3, [%[r], #192]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) + "str r5, [%[r]], #4\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); #endif @@ -15334,345 +14720,345 @@ static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a, const sp_dig __asm__ __volatile__ ( "mov r9, #0\n\t" - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "subs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #24]\n\t" - "ldrd r4, r5, [%[a], #32]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #40]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #40]\n\t" - "ldrd r4, r5, [%[a], #48]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #56]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #56]\n\t" - "ldrd r4, r5, [%[a], #64]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #64]\n\t" - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #72]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #72]\n\t" - "ldrd r4, r5, [%[a], #80]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #80]\n\t" - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #88]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #88]\n\t" - "ldrd r4, r5, [%[a], #96]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #96]\n\t" - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #104]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #104]\n\t" - "ldrd r4, r5, [%[a], #112]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #112]\n\t" - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #120]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #120]\n\t" - "ldrd r4, r5, [%[a], #128]\n\t" - "ldrd r6, r7, [%[b], #128]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #128]\n\t" - "ldrd r4, r5, [%[a], #136]\n\t" - "ldrd r6, r7, [%[b], #136]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #136]\n\t" - "ldrd r4, r5, [%[a], #144]\n\t" - "ldrd r6, r7, [%[b], #144]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #144]\n\t" - "ldrd r4, r5, [%[a], #152]\n\t" - "ldrd r6, r7, [%[b], #152]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #152]\n\t" - "ldrd r4, r5, [%[a], #160]\n\t" - "ldrd r6, r7, [%[b], #160]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #160]\n\t" - "ldrd r4, r5, [%[a], #168]\n\t" - "ldrd r6, r7, [%[b], #168]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #168]\n\t" - "ldrd r4, r5, [%[a], #176]\n\t" - "ldrd r6, r7, [%[b], #176]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #176]\n\t" - "ldrd r4, r5, [%[a], #184]\n\t" - "ldrd r6, r7, [%[b], #184]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #184]\n\t" - "ldrd r4, r5, [%[a], #192]\n\t" - "ldrd r6, r7, [%[b], #192]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #192]\n\t" - "ldrd r4, r5, [%[a], #200]\n\t" - "ldrd r6, r7, [%[b], #200]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #200]\n\t" - "ldrd r4, r5, [%[a], #208]\n\t" - "ldrd r6, r7, [%[b], #208]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #208]\n\t" - "ldrd r4, r5, [%[a], #216]\n\t" - "ldrd r6, r7, [%[b], #216]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #216]\n\t" - "ldrd r4, r5, [%[a], #224]\n\t" - "ldrd r6, r7, [%[b], #224]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #224]\n\t" - "ldrd r4, r5, [%[a], #232]\n\t" - "ldrd r6, r7, [%[b], #232]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #232]\n\t" - "ldrd r4, r5, [%[a], #240]\n\t" - "ldrd r6, r7, [%[b], #240]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #240]\n\t" - "ldrd r4, r5, [%[a], #248]\n\t" - "ldrd r6, r7, [%[b], #248]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #248]\n\t" - "ldrd r4, r5, [%[a], #256]\n\t" - "ldrd r6, r7, [%[b], #256]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #256]\n\t" - "ldrd r4, r5, [%[a], #264]\n\t" - "ldrd r6, r7, [%[b], #264]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #264]\n\t" - "ldrd r4, r5, [%[a], #272]\n\t" - "ldrd r6, r7, [%[b], #272]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #272]\n\t" - "ldrd r4, r5, [%[a], #280]\n\t" - "ldrd r6, r7, [%[b], #280]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #280]\n\t" - "ldrd r4, r5, [%[a], #288]\n\t" - "ldrd r6, r7, [%[b], #288]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #288]\n\t" - "ldrd r4, r5, [%[a], #296]\n\t" - "ldrd r6, r7, [%[b], #296]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #296]\n\t" - "ldrd r4, r5, [%[a], #304]\n\t" - "ldrd r6, r7, [%[b], #304]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #304]\n\t" - "ldrd r4, r5, [%[a], #312]\n\t" - "ldrd r6, r7, [%[b], #312]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #312]\n\t" - "ldrd r4, r5, [%[a], #320]\n\t" - "ldrd r6, r7, [%[b], #320]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #320]\n\t" - "ldrd r4, r5, [%[a], #328]\n\t" - "ldrd r6, r7, [%[b], #328]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #328]\n\t" - "ldrd r4, r5, [%[a], #336]\n\t" - "ldrd r6, r7, [%[b], #336]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #336]\n\t" - "ldrd r4, r5, [%[a], #344]\n\t" - "ldrd r6, r7, [%[b], #344]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #344]\n\t" - "ldrd r4, r5, [%[a], #352]\n\t" - "ldrd r6, r7, [%[b], #352]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #352]\n\t" - "ldrd r4, r5, [%[a], #360]\n\t" - "ldrd r6, r7, [%[b], #360]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #360]\n\t" - "ldrd r4, r5, [%[a], #368]\n\t" - "ldrd r6, r7, [%[b], #368]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #368]\n\t" - "ldrd r4, r5, [%[a], #376]\n\t" - "ldrd r6, r7, [%[b], #376]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #376]\n\t" + "stm %[r]!, {r4, r5}\n\t" "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : [m] "r" (m) : "memory", "r4", "r5", "r6", "r7", "r8", "r9" ); #endif /* WOLFSSL_SP_SMALL */ @@ -16624,22 +16010,19 @@ static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, "add r14, %[a], #384\n\t" "\n1:\n\t" "rsbs %[c], %[c], #0\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "sbc %[c], r3, r3\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r4, r4\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -16658,250 +16041,178 @@ static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "subs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldrd r3, r4, [%[a], #64]\n\t" - "ldrd r5, r6, [%[a], #72]\n\t" - "ldrd r7, r8, [%[b], #64]\n\t" - "ldrd r9, r10, [%[b], #72]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #64]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "ldrd r3, r4, [%[a], #80]\n\t" - "ldrd r5, r6, [%[a], #88]\n\t" - "ldrd r7, r8, [%[b], #80]\n\t" - "ldrd r9, r10, [%[b], #88]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #80]\n\t" - "strd r5, r6, [%[r], #88]\n\t" - "ldrd r3, r4, [%[a], #96]\n\t" - "ldrd r5, r6, [%[a], #104]\n\t" - "ldrd r7, r8, [%[b], #96]\n\t" - "ldrd r9, r10, [%[b], #104]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #96]\n\t" - "strd r5, r6, [%[r], #104]\n\t" - "ldrd r3, r4, [%[a], #112]\n\t" - "ldrd r5, r6, [%[a], #120]\n\t" - "ldrd r7, r8, [%[b], #112]\n\t" - "ldrd r9, r10, [%[b], #120]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #112]\n\t" - "strd r5, r6, [%[r], #120]\n\t" - "ldrd r3, r4, [%[a], #128]\n\t" - "ldrd r5, r6, [%[a], #136]\n\t" - "ldrd r7, r8, [%[b], #128]\n\t" - "ldrd r9, r10, [%[b], #136]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #128]\n\t" - "strd r5, r6, [%[r], #136]\n\t" - "ldrd r3, r4, [%[a], #144]\n\t" - "ldrd r5, r6, [%[a], #152]\n\t" - "ldrd r7, r8, [%[b], #144]\n\t" - "ldrd r9, r10, [%[b], #152]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #144]\n\t" - "strd r5, r6, [%[r], #152]\n\t" - "ldrd r3, r4, [%[a], #160]\n\t" - "ldrd r5, r6, [%[a], #168]\n\t" - "ldrd r7, r8, [%[b], #160]\n\t" - "ldrd r9, r10, [%[b], #168]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #160]\n\t" - "strd r5, r6, [%[r], #168]\n\t" - "ldrd r3, r4, [%[a], #176]\n\t" - "ldrd r5, r6, [%[a], #184]\n\t" - "ldrd r7, r8, [%[b], #176]\n\t" - "ldrd r9, r10, [%[b], #184]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #176]\n\t" - "strd r5, r6, [%[r], #184]\n\t" - "ldrd r3, r4, [%[a], #192]\n\t" - "ldrd r5, r6, [%[a], #200]\n\t" - "ldrd r7, r8, [%[b], #192]\n\t" - "ldrd r9, r10, [%[b], #200]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #192]\n\t" - "strd r5, r6, [%[r], #200]\n\t" - "ldrd r3, r4, [%[a], #208]\n\t" - "ldrd r5, r6, [%[a], #216]\n\t" - "ldrd r7, r8, [%[b], #208]\n\t" - "ldrd r9, r10, [%[b], #216]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #208]\n\t" - "strd r5, r6, [%[r], #216]\n\t" - "ldrd r3, r4, [%[a], #224]\n\t" - "ldrd r5, r6, [%[a], #232]\n\t" - "ldrd r7, r8, [%[b], #224]\n\t" - "ldrd r9, r10, [%[b], #232]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #224]\n\t" - "strd r5, r6, [%[r], #232]\n\t" - "ldrd r3, r4, [%[a], #240]\n\t" - "ldrd r5, r6, [%[a], #248]\n\t" - "ldrd r7, r8, [%[b], #240]\n\t" - "ldrd r9, r10, [%[b], #248]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #240]\n\t" - "strd r5, r6, [%[r], #248]\n\t" - "ldrd r3, r4, [%[a], #256]\n\t" - "ldrd r5, r6, [%[a], #264]\n\t" - "ldrd r7, r8, [%[b], #256]\n\t" - "ldrd r9, r10, [%[b], #264]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #256]\n\t" - "strd r5, r6, [%[r], #264]\n\t" - "ldrd r3, r4, [%[a], #272]\n\t" - "ldrd r5, r6, [%[a], #280]\n\t" - "ldrd r7, r8, [%[b], #272]\n\t" - "ldrd r9, r10, [%[b], #280]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #272]\n\t" - "strd r5, r6, [%[r], #280]\n\t" - "ldrd r3, r4, [%[a], #288]\n\t" - "ldrd r5, r6, [%[a], #296]\n\t" - "ldrd r7, r8, [%[b], #288]\n\t" - "ldrd r9, r10, [%[b], #296]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #288]\n\t" - "strd r5, r6, [%[r], #296]\n\t" - "ldrd r3, r4, [%[a], #304]\n\t" - "ldrd r5, r6, [%[a], #312]\n\t" - "ldrd r7, r8, [%[b], #304]\n\t" - "ldrd r9, r10, [%[b], #312]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #304]\n\t" - "strd r5, r6, [%[r], #312]\n\t" - "ldrd r3, r4, [%[a], #320]\n\t" - "ldrd r5, r6, [%[a], #328]\n\t" - "ldrd r7, r8, [%[b], #320]\n\t" - "ldrd r9, r10, [%[b], #328]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #320]\n\t" - "strd r5, r6, [%[r], #328]\n\t" - "ldrd r3, r4, [%[a], #336]\n\t" - "ldrd r5, r6, [%[a], #344]\n\t" - "ldrd r7, r8, [%[b], #336]\n\t" - "ldrd r9, r10, [%[b], #344]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #336]\n\t" - "strd r5, r6, [%[r], #344]\n\t" - "ldrd r3, r4, [%[a], #352]\n\t" - "ldrd r5, r6, [%[a], #360]\n\t" - "ldrd r7, r8, [%[b], #352]\n\t" - "ldrd r9, r10, [%[b], #360]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #352]\n\t" - "strd r5, r6, [%[r], #360]\n\t" - "ldrd r3, r4, [%[a], #368]\n\t" - "ldrd r5, r6, [%[a], #376]\n\t" - "ldrd r7, r8, [%[b], #368]\n\t" - "ldrd r9, r10, [%[b], #376]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #368]\n\t" - "strd r5, r6, [%[r], #376]\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -19812,7 +19123,8 @@ static void sp_3072_lshift_96(sp_digit* r, const sp_digit* a, byte n) "lsl r4, r4, %[n]\n\t" "lsr r5, r5, r6\n\t" "orr r2, r2, r5\n\t" - "strd r4, r2, [%[r]]\n\t" + "str r4, [%[r], #0]\n\t" + "str r2, [%[r], #4]\n\t" : : [r] "r" (r), [a] "r" (a), [n] "r" (n) : "memory", "r2", "r3", "r4", "r5", "r6" @@ -20236,330 +19548,234 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r2, r3, [%[a], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" - "ldrd r8, r9, [%[b], #8]\n\t" - "subs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #0]\n\t" - "strd r4, r5, [%[a], #8]\n\t" - "ldrd r2, r3, [%[a], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" - "ldrd r8, r9, [%[b], #24]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #16]\n\t" - "strd r4, r5, [%[a], #24]\n\t" - "ldrd r2, r3, [%[a], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" - "ldrd r8, r9, [%[b], #40]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #32]\n\t" - "strd r4, r5, [%[a], #40]\n\t" - "ldrd r2, r3, [%[a], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" - "ldrd r8, r9, [%[b], #56]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #48]\n\t" - "strd r4, r5, [%[a], #56]\n\t" - "ldrd r2, r3, [%[a], #64]\n\t" - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" - "ldrd r8, r9, [%[b], #72]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #64]\n\t" - "strd r4, r5, [%[a], #72]\n\t" - "ldrd r2, r3, [%[a], #80]\n\t" - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" - "ldrd r8, r9, [%[b], #88]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #80]\n\t" - "strd r4, r5, [%[a], #88]\n\t" - "ldrd r2, r3, [%[a], #96]\n\t" - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" - "ldrd r8, r9, [%[b], #104]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #96]\n\t" - "strd r4, r5, [%[a], #104]\n\t" - "ldrd r2, r3, [%[a], #112]\n\t" - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" - "ldrd r8, r9, [%[b], #120]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #112]\n\t" - "strd r4, r5, [%[a], #120]\n\t" - "ldrd r2, r3, [%[a], #128]\n\t" - "ldrd r4, r5, [%[a], #136]\n\t" - "ldrd r6, r7, [%[b], #128]\n\t" - "ldrd r8, r9, [%[b], #136]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #128]\n\t" - "strd r4, r5, [%[a], #136]\n\t" - "ldrd r2, r3, [%[a], #144]\n\t" - "ldrd r4, r5, [%[a], #152]\n\t" - "ldrd r6, r7, [%[b], #144]\n\t" - "ldrd r8, r9, [%[b], #152]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #144]\n\t" - "strd r4, r5, [%[a], #152]\n\t" - "ldrd r2, r3, [%[a], #160]\n\t" - "ldrd r4, r5, [%[a], #168]\n\t" - "ldrd r6, r7, [%[b], #160]\n\t" - "ldrd r8, r9, [%[b], #168]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #160]\n\t" - "strd r4, r5, [%[a], #168]\n\t" - "ldrd r2, r3, [%[a], #176]\n\t" - "ldrd r4, r5, [%[a], #184]\n\t" - "ldrd r6, r7, [%[b], #176]\n\t" - "ldrd r8, r9, [%[b], #184]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #176]\n\t" - "strd r4, r5, [%[a], #184]\n\t" - "ldrd r2, r3, [%[a], #192]\n\t" - "ldrd r4, r5, [%[a], #200]\n\t" - "ldrd r6, r7, [%[b], #192]\n\t" - "ldrd r8, r9, [%[b], #200]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #192]\n\t" - "strd r4, r5, [%[a], #200]\n\t" - "ldrd r2, r3, [%[a], #208]\n\t" - "ldrd r4, r5, [%[a], #216]\n\t" - "ldrd r6, r7, [%[b], #208]\n\t" - "ldrd r8, r9, [%[b], #216]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #208]\n\t" - "strd r4, r5, [%[a], #216]\n\t" - "ldrd r2, r3, [%[a], #224]\n\t" - "ldrd r4, r5, [%[a], #232]\n\t" - "ldrd r6, r7, [%[b], #224]\n\t" - "ldrd r8, r9, [%[b], #232]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #224]\n\t" - "strd r4, r5, [%[a], #232]\n\t" - "ldrd r2, r3, [%[a], #240]\n\t" - "ldrd r4, r5, [%[a], #248]\n\t" - "ldrd r6, r7, [%[b], #240]\n\t" - "ldrd r8, r9, [%[b], #248]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #240]\n\t" - "strd r4, r5, [%[a], #248]\n\t" - "ldrd r2, r3, [%[a], #256]\n\t" - "ldrd r4, r5, [%[a], #264]\n\t" - "ldrd r6, r7, [%[b], #256]\n\t" - "ldrd r8, r9, [%[b], #264]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #256]\n\t" - "strd r4, r5, [%[a], #264]\n\t" - "ldrd r2, r3, [%[a], #272]\n\t" - "ldrd r4, r5, [%[a], #280]\n\t" - "ldrd r6, r7, [%[b], #272]\n\t" - "ldrd r8, r9, [%[b], #280]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #272]\n\t" - "strd r4, r5, [%[a], #280]\n\t" - "ldrd r2, r3, [%[a], #288]\n\t" - "ldrd r4, r5, [%[a], #296]\n\t" - "ldrd r6, r7, [%[b], #288]\n\t" - "ldrd r8, r9, [%[b], #296]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #288]\n\t" - "strd r4, r5, [%[a], #296]\n\t" - "ldrd r2, r3, [%[a], #304]\n\t" - "ldrd r4, r5, [%[a], #312]\n\t" - "ldrd r6, r7, [%[b], #304]\n\t" - "ldrd r8, r9, [%[b], #312]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #304]\n\t" - "strd r4, r5, [%[a], #312]\n\t" - "ldrd r2, r3, [%[a], #320]\n\t" - "ldrd r4, r5, [%[a], #328]\n\t" - "ldrd r6, r7, [%[b], #320]\n\t" - "ldrd r8, r9, [%[b], #328]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #320]\n\t" - "strd r4, r5, [%[a], #328]\n\t" - "ldrd r2, r3, [%[a], #336]\n\t" - "ldrd r4, r5, [%[a], #344]\n\t" - "ldrd r6, r7, [%[b], #336]\n\t" - "ldrd r8, r9, [%[b], #344]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #336]\n\t" - "strd r4, r5, [%[a], #344]\n\t" - "ldrd r2, r3, [%[a], #352]\n\t" - "ldrd r4, r5, [%[a], #360]\n\t" - "ldrd r6, r7, [%[b], #352]\n\t" - "ldrd r8, r9, [%[b], #360]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #352]\n\t" - "strd r4, r5, [%[a], #360]\n\t" - "ldrd r2, r3, [%[a], #368]\n\t" - "ldrd r4, r5, [%[a], #376]\n\t" - "ldrd r6, r7, [%[b], #368]\n\t" - "ldrd r8, r9, [%[b], #376]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #368]\n\t" - "strd r4, r5, [%[a], #376]\n\t" - "ldrd r2, r3, [%[a], #384]\n\t" - "ldrd r4, r5, [%[a], #392]\n\t" - "ldrd r6, r7, [%[b], #384]\n\t" - "ldrd r8, r9, [%[b], #392]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #384]\n\t" - "strd r4, r5, [%[a], #392]\n\t" - "ldrd r2, r3, [%[a], #400]\n\t" - "ldrd r4, r5, [%[a], #408]\n\t" - "ldrd r6, r7, [%[b], #400]\n\t" - "ldrd r8, r9, [%[b], #408]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #400]\n\t" - "strd r4, r5, [%[a], #408]\n\t" - "ldrd r2, r3, [%[a], #416]\n\t" - "ldrd r4, r5, [%[a], #424]\n\t" - "ldrd r6, r7, [%[b], #416]\n\t" - "ldrd r8, r9, [%[b], #424]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #416]\n\t" - "strd r4, r5, [%[a], #424]\n\t" - "ldrd r2, r3, [%[a], #432]\n\t" - "ldrd r4, r5, [%[a], #440]\n\t" - "ldrd r6, r7, [%[b], #432]\n\t" - "ldrd r8, r9, [%[b], #440]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #432]\n\t" - "strd r4, r5, [%[a], #440]\n\t" - "ldrd r2, r3, [%[a], #448]\n\t" - "ldrd r4, r5, [%[a], #456]\n\t" - "ldrd r6, r7, [%[b], #448]\n\t" - "ldrd r8, r9, [%[b], #456]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #448]\n\t" - "strd r4, r5, [%[a], #456]\n\t" - "ldrd r2, r3, [%[a], #464]\n\t" - "ldrd r4, r5, [%[a], #472]\n\t" - "ldrd r6, r7, [%[b], #464]\n\t" - "ldrd r8, r9, [%[b], #472]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #464]\n\t" - "strd r4, r5, [%[a], #472]\n\t" - "ldrd r2, r3, [%[a], #480]\n\t" - "ldrd r4, r5, [%[a], #488]\n\t" - "ldrd r6, r7, [%[b], #480]\n\t" - "ldrd r8, r9, [%[b], #488]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #480]\n\t" - "strd r4, r5, [%[a], #488]\n\t" - "ldrd r2, r3, [%[a], #496]\n\t" - "ldrd r4, r5, [%[a], #504]\n\t" - "ldrd r6, r7, [%[b], #496]\n\t" - "ldrd r8, r9, [%[b], #504]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #496]\n\t" - "strd r4, r5, [%[a], #504]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r11, r11\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -20578,330 +19794,234 @@ static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "adds r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldrd r3, r4, [%[a], #64]\n\t" - "ldrd r5, r6, [%[a], #72]\n\t" - "ldrd r7, r8, [%[b], #64]\n\t" - "ldrd r9, r10, [%[b], #72]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #64]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "ldrd r3, r4, [%[a], #80]\n\t" - "ldrd r5, r6, [%[a], #88]\n\t" - "ldrd r7, r8, [%[b], #80]\n\t" - "ldrd r9, r10, [%[b], #88]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #80]\n\t" - "strd r5, r6, [%[r], #88]\n\t" - "ldrd r3, r4, [%[a], #96]\n\t" - "ldrd r5, r6, [%[a], #104]\n\t" - "ldrd r7, r8, [%[b], #96]\n\t" - "ldrd r9, r10, [%[b], #104]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #96]\n\t" - "strd r5, r6, [%[r], #104]\n\t" - "ldrd r3, r4, [%[a], #112]\n\t" - "ldrd r5, r6, [%[a], #120]\n\t" - "ldrd r7, r8, [%[b], #112]\n\t" - "ldrd r9, r10, [%[b], #120]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #112]\n\t" - "strd r5, r6, [%[r], #120]\n\t" - "ldrd r3, r4, [%[a], #128]\n\t" - "ldrd r5, r6, [%[a], #136]\n\t" - "ldrd r7, r8, [%[b], #128]\n\t" - "ldrd r9, r10, [%[b], #136]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #128]\n\t" - "strd r5, r6, [%[r], #136]\n\t" - "ldrd r3, r4, [%[a], #144]\n\t" - "ldrd r5, r6, [%[a], #152]\n\t" - "ldrd r7, r8, [%[b], #144]\n\t" - "ldrd r9, r10, [%[b], #152]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #144]\n\t" - "strd r5, r6, [%[r], #152]\n\t" - "ldrd r3, r4, [%[a], #160]\n\t" - "ldrd r5, r6, [%[a], #168]\n\t" - "ldrd r7, r8, [%[b], #160]\n\t" - "ldrd r9, r10, [%[b], #168]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #160]\n\t" - "strd r5, r6, [%[r], #168]\n\t" - "ldrd r3, r4, [%[a], #176]\n\t" - "ldrd r5, r6, [%[a], #184]\n\t" - "ldrd r7, r8, [%[b], #176]\n\t" - "ldrd r9, r10, [%[b], #184]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #176]\n\t" - "strd r5, r6, [%[r], #184]\n\t" - "ldrd r3, r4, [%[a], #192]\n\t" - "ldrd r5, r6, [%[a], #200]\n\t" - "ldrd r7, r8, [%[b], #192]\n\t" - "ldrd r9, r10, [%[b], #200]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #192]\n\t" - "strd r5, r6, [%[r], #200]\n\t" - "ldrd r3, r4, [%[a], #208]\n\t" - "ldrd r5, r6, [%[a], #216]\n\t" - "ldrd r7, r8, [%[b], #208]\n\t" - "ldrd r9, r10, [%[b], #216]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #208]\n\t" - "strd r5, r6, [%[r], #216]\n\t" - "ldrd r3, r4, [%[a], #224]\n\t" - "ldrd r5, r6, [%[a], #232]\n\t" - "ldrd r7, r8, [%[b], #224]\n\t" - "ldrd r9, r10, [%[b], #232]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #224]\n\t" - "strd r5, r6, [%[r], #232]\n\t" - "ldrd r3, r4, [%[a], #240]\n\t" - "ldrd r5, r6, [%[a], #248]\n\t" - "ldrd r7, r8, [%[b], #240]\n\t" - "ldrd r9, r10, [%[b], #248]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #240]\n\t" - "strd r5, r6, [%[r], #248]\n\t" - "ldrd r3, r4, [%[a], #256]\n\t" - "ldrd r5, r6, [%[a], #264]\n\t" - "ldrd r7, r8, [%[b], #256]\n\t" - "ldrd r9, r10, [%[b], #264]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #256]\n\t" - "strd r5, r6, [%[r], #264]\n\t" - "ldrd r3, r4, [%[a], #272]\n\t" - "ldrd r5, r6, [%[a], #280]\n\t" - "ldrd r7, r8, [%[b], #272]\n\t" - "ldrd r9, r10, [%[b], #280]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #272]\n\t" - "strd r5, r6, [%[r], #280]\n\t" - "ldrd r3, r4, [%[a], #288]\n\t" - "ldrd r5, r6, [%[a], #296]\n\t" - "ldrd r7, r8, [%[b], #288]\n\t" - "ldrd r9, r10, [%[b], #296]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #288]\n\t" - "strd r5, r6, [%[r], #296]\n\t" - "ldrd r3, r4, [%[a], #304]\n\t" - "ldrd r5, r6, [%[a], #312]\n\t" - "ldrd r7, r8, [%[b], #304]\n\t" - "ldrd r9, r10, [%[b], #312]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #304]\n\t" - "strd r5, r6, [%[r], #312]\n\t" - "ldrd r3, r4, [%[a], #320]\n\t" - "ldrd r5, r6, [%[a], #328]\n\t" - "ldrd r7, r8, [%[b], #320]\n\t" - "ldrd r9, r10, [%[b], #328]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #320]\n\t" - "strd r5, r6, [%[r], #328]\n\t" - "ldrd r3, r4, [%[a], #336]\n\t" - "ldrd r5, r6, [%[a], #344]\n\t" - "ldrd r7, r8, [%[b], #336]\n\t" - "ldrd r9, r10, [%[b], #344]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #336]\n\t" - "strd r5, r6, [%[r], #344]\n\t" - "ldrd r3, r4, [%[a], #352]\n\t" - "ldrd r5, r6, [%[a], #360]\n\t" - "ldrd r7, r8, [%[b], #352]\n\t" - "ldrd r9, r10, [%[b], #360]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #352]\n\t" - "strd r5, r6, [%[r], #360]\n\t" - "ldrd r3, r4, [%[a], #368]\n\t" - "ldrd r5, r6, [%[a], #376]\n\t" - "ldrd r7, r8, [%[b], #368]\n\t" - "ldrd r9, r10, [%[b], #376]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #368]\n\t" - "strd r5, r6, [%[r], #376]\n\t" - "ldrd r3, r4, [%[a], #384]\n\t" - "ldrd r5, r6, [%[a], #392]\n\t" - "ldrd r7, r8, [%[b], #384]\n\t" - "ldrd r9, r10, [%[b], #392]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #384]\n\t" - "strd r5, r6, [%[r], #392]\n\t" - "ldrd r3, r4, [%[a], #400]\n\t" - "ldrd r5, r6, [%[a], #408]\n\t" - "ldrd r7, r8, [%[b], #400]\n\t" - "ldrd r9, r10, [%[b], #408]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #400]\n\t" - "strd r5, r6, [%[r], #408]\n\t" - "ldrd r3, r4, [%[a], #416]\n\t" - "ldrd r5, r6, [%[a], #424]\n\t" - "ldrd r7, r8, [%[b], #416]\n\t" - "ldrd r9, r10, [%[b], #424]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #416]\n\t" - "strd r5, r6, [%[r], #424]\n\t" - "ldrd r3, r4, [%[a], #432]\n\t" - "ldrd r5, r6, [%[a], #440]\n\t" - "ldrd r7, r8, [%[b], #432]\n\t" - "ldrd r9, r10, [%[b], #440]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #432]\n\t" - "strd r5, r6, [%[r], #440]\n\t" - "ldrd r3, r4, [%[a], #448]\n\t" - "ldrd r5, r6, [%[a], #456]\n\t" - "ldrd r7, r8, [%[b], #448]\n\t" - "ldrd r9, r10, [%[b], #456]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #448]\n\t" - "strd r5, r6, [%[r], #456]\n\t" - "ldrd r3, r4, [%[a], #464]\n\t" - "ldrd r5, r6, [%[a], #472]\n\t" - "ldrd r7, r8, [%[b], #464]\n\t" - "ldrd r9, r10, [%[b], #472]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #464]\n\t" - "strd r5, r6, [%[r], #472]\n\t" - "ldrd r3, r4, [%[a], #480]\n\t" - "ldrd r5, r6, [%[a], #488]\n\t" - "ldrd r7, r8, [%[b], #480]\n\t" - "ldrd r9, r10, [%[b], #488]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #480]\n\t" - "strd r5, r6, [%[r], #488]\n\t" - "ldrd r3, r4, [%[a], #496]\n\t" - "ldrd r5, r6, [%[a], #504]\n\t" - "ldrd r7, r8, [%[b], #496]\n\t" - "ldrd r9, r10, [%[b], #504]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #496]\n\t" - "strd r5, r6, [%[r], #504]\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "adc %[c], r14, r14\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -20999,23 +20119,20 @@ static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a, "add r14, %[a], #512\n\t" "\n1:\n\t" "adds %[c], %[c], #-1\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "adcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "mov r3, #0\n\t" - "adc %[c], r3, #0\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -21037,22 +20154,19 @@ static sp_digit sp_4096_sub_in_place_128(sp_digit* a, const sp_digit* b) "add r12, %[a], #512\n\t" "\n1:\n\t" "subs %[c], r14, %[c]\n\t" - "ldrd r3, r4, [%[a]]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[a]], #8\n\t" - "strd r5, r6, [%[a]], #8\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" "sbc %[c], r14, r14\n\t" "cmp %[a], r12\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" ); return c; @@ -21253,901 +20367,901 @@ static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r10, #0\n\t" "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r3, r4, %[b], r8\n\t" "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" + "str r4, [%[r]], #4\n\t" "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" + "str r5, [%[r]], #4\n\t" "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" + "str r3, [%[r]], #4\n\t" "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" + "str r4, [%[r]], #4\n\t" "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" + "str r5, [%[r]], #4\n\t" "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" + "str r3, [%[r]], #4\n\t" "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" + "str r4, [%[r]], #4\n\t" "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" + "str r5, [%[r]], #4\n\t" "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" + "str r3, [%[r]], #4\n\t" "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" + "str r4, [%[r]], #4\n\t" "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #44]\n\t" + "str r5, [%[r]], #4\n\t" "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #48]\n\t" + "str r3, [%[r]], #4\n\t" "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #52]\n\t" + "str r4, [%[r]], #4\n\t" "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #56]\n\t" + "str r5, [%[r]], #4\n\t" "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #60]\n\t" + "str r3, [%[r]], #4\n\t" "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #64]\n\t" + "str r4, [%[r]], #4\n\t" "# A[17] * B\n\t" - "ldr r8, [%[a], #68]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #68]\n\t" + "str r5, [%[r]], #4\n\t" "# A[18] * B\n\t" - "ldr r8, [%[a], #72]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #72]\n\t" + "str r3, [%[r]], #4\n\t" "# A[19] * B\n\t" - "ldr r8, [%[a], #76]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #76]\n\t" + "str r4, [%[r]], #4\n\t" "# A[20] * B\n\t" - "ldr r8, [%[a], #80]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #80]\n\t" + "str r5, [%[r]], #4\n\t" "# A[21] * B\n\t" - "ldr r8, [%[a], #84]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #84]\n\t" + "str r3, [%[r]], #4\n\t" "# A[22] * B\n\t" - "ldr r8, [%[a], #88]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #88]\n\t" + "str r4, [%[r]], #4\n\t" "# A[23] * B\n\t" - "ldr r8, [%[a], #92]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #92]\n\t" + "str r5, [%[r]], #4\n\t" "# A[24] * B\n\t" - "ldr r8, [%[a], #96]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #96]\n\t" + "str r3, [%[r]], #4\n\t" "# A[25] * B\n\t" - "ldr r8, [%[a], #100]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #100]\n\t" + "str r4, [%[r]], #4\n\t" "# A[26] * B\n\t" - "ldr r8, [%[a], #104]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #104]\n\t" + "str r5, [%[r]], #4\n\t" "# A[27] * B\n\t" - "ldr r8, [%[a], #108]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #108]\n\t" + "str r3, [%[r]], #4\n\t" "# A[28] * B\n\t" - "ldr r8, [%[a], #112]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #112]\n\t" + "str r4, [%[r]], #4\n\t" "# A[29] * B\n\t" - "ldr r8, [%[a], #116]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #116]\n\t" + "str r5, [%[r]], #4\n\t" "# A[30] * B\n\t" - "ldr r8, [%[a], #120]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #120]\n\t" + "str r3, [%[r]], #4\n\t" "# A[31] * B\n\t" - "ldr r8, [%[a], #124]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #124]\n\t" + "str r4, [%[r]], #4\n\t" "# A[32] * B\n\t" - "ldr r8, [%[a], #128]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #128]\n\t" + "str r5, [%[r]], #4\n\t" "# A[33] * B\n\t" - "ldr r8, [%[a], #132]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #132]\n\t" + "str r3, [%[r]], #4\n\t" "# A[34] * B\n\t" - "ldr r8, [%[a], #136]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #136]\n\t" + "str r4, [%[r]], #4\n\t" "# A[35] * B\n\t" - "ldr r8, [%[a], #140]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #140]\n\t" + "str r5, [%[r]], #4\n\t" "# A[36] * B\n\t" - "ldr r8, [%[a], #144]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #144]\n\t" + "str r3, [%[r]], #4\n\t" "# A[37] * B\n\t" - "ldr r8, [%[a], #148]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #148]\n\t" + "str r4, [%[r]], #4\n\t" "# A[38] * B\n\t" - "ldr r8, [%[a], #152]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #152]\n\t" + "str r5, [%[r]], #4\n\t" "# A[39] * B\n\t" - "ldr r8, [%[a], #156]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #156]\n\t" + "str r3, [%[r]], #4\n\t" "# A[40] * B\n\t" - "ldr r8, [%[a], #160]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #160]\n\t" + "str r4, [%[r]], #4\n\t" "# A[41] * B\n\t" - "ldr r8, [%[a], #164]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #164]\n\t" + "str r5, [%[r]], #4\n\t" "# A[42] * B\n\t" - "ldr r8, [%[a], #168]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #168]\n\t" + "str r3, [%[r]], #4\n\t" "# A[43] * B\n\t" - "ldr r8, [%[a], #172]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #172]\n\t" + "str r4, [%[r]], #4\n\t" "# A[44] * B\n\t" - "ldr r8, [%[a], #176]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #176]\n\t" + "str r5, [%[r]], #4\n\t" "# A[45] * B\n\t" - "ldr r8, [%[a], #180]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #180]\n\t" + "str r3, [%[r]], #4\n\t" "# A[46] * B\n\t" - "ldr r8, [%[a], #184]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #184]\n\t" + "str r4, [%[r]], #4\n\t" "# A[47] * B\n\t" - "ldr r8, [%[a], #188]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #188]\n\t" + "str r5, [%[r]], #4\n\t" "# A[48] * B\n\t" - "ldr r8, [%[a], #192]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #192]\n\t" + "str r3, [%[r]], #4\n\t" "# A[49] * B\n\t" - "ldr r8, [%[a], #196]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #196]\n\t" + "str r4, [%[r]], #4\n\t" "# A[50] * B\n\t" - "ldr r8, [%[a], #200]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #200]\n\t" + "str r5, [%[r]], #4\n\t" "# A[51] * B\n\t" - "ldr r8, [%[a], #204]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #204]\n\t" + "str r3, [%[r]], #4\n\t" "# A[52] * B\n\t" - "ldr r8, [%[a], #208]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #208]\n\t" + "str r4, [%[r]], #4\n\t" "# A[53] * B\n\t" - "ldr r8, [%[a], #212]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #212]\n\t" + "str r5, [%[r]], #4\n\t" "# A[54] * B\n\t" - "ldr r8, [%[a], #216]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #216]\n\t" + "str r3, [%[r]], #4\n\t" "# A[55] * B\n\t" - "ldr r8, [%[a], #220]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #220]\n\t" + "str r4, [%[r]], #4\n\t" "# A[56] * B\n\t" - "ldr r8, [%[a], #224]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #224]\n\t" + "str r5, [%[r]], #4\n\t" "# A[57] * B\n\t" - "ldr r8, [%[a], #228]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #228]\n\t" + "str r3, [%[r]], #4\n\t" "# A[58] * B\n\t" - "ldr r8, [%[a], #232]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #232]\n\t" + "str r4, [%[r]], #4\n\t" "# A[59] * B\n\t" - "ldr r8, [%[a], #236]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #236]\n\t" + "str r5, [%[r]], #4\n\t" "# A[60] * B\n\t" - "ldr r8, [%[a], #240]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #240]\n\t" + "str r3, [%[r]], #4\n\t" "# A[61] * B\n\t" - "ldr r8, [%[a], #244]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #244]\n\t" + "str r4, [%[r]], #4\n\t" "# A[62] * B\n\t" - "ldr r8, [%[a], #248]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #248]\n\t" + "str r5, [%[r]], #4\n\t" "# A[63] * B\n\t" - "ldr r8, [%[a], #252]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #252]\n\t" + "str r3, [%[r]], #4\n\t" "# A[64] * B\n\t" - "ldr r8, [%[a], #256]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #256]\n\t" + "str r4, [%[r]], #4\n\t" "# A[65] * B\n\t" - "ldr r8, [%[a], #260]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #260]\n\t" + "str r5, [%[r]], #4\n\t" "# A[66] * B\n\t" - "ldr r8, [%[a], #264]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #264]\n\t" + "str r3, [%[r]], #4\n\t" "# A[67] * B\n\t" - "ldr r8, [%[a], #268]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #268]\n\t" + "str r4, [%[r]], #4\n\t" "# A[68] * B\n\t" - "ldr r8, [%[a], #272]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #272]\n\t" + "str r5, [%[r]], #4\n\t" "# A[69] * B\n\t" - "ldr r8, [%[a], #276]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #276]\n\t" + "str r3, [%[r]], #4\n\t" "# A[70] * B\n\t" - "ldr r8, [%[a], #280]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #280]\n\t" + "str r4, [%[r]], #4\n\t" "# A[71] * B\n\t" - "ldr r8, [%[a], #284]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #284]\n\t" + "str r5, [%[r]], #4\n\t" "# A[72] * B\n\t" - "ldr r8, [%[a], #288]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #288]\n\t" + "str r3, [%[r]], #4\n\t" "# A[73] * B\n\t" - "ldr r8, [%[a], #292]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #292]\n\t" + "str r4, [%[r]], #4\n\t" "# A[74] * B\n\t" - "ldr r8, [%[a], #296]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #296]\n\t" + "str r5, [%[r]], #4\n\t" "# A[75] * B\n\t" - "ldr r8, [%[a], #300]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #300]\n\t" + "str r3, [%[r]], #4\n\t" "# A[76] * B\n\t" - "ldr r8, [%[a], #304]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #304]\n\t" + "str r4, [%[r]], #4\n\t" "# A[77] * B\n\t" - "ldr r8, [%[a], #308]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #308]\n\t" + "str r5, [%[r]], #4\n\t" "# A[78] * B\n\t" - "ldr r8, [%[a], #312]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #312]\n\t" + "str r3, [%[r]], #4\n\t" "# A[79] * B\n\t" - "ldr r8, [%[a], #316]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #316]\n\t" + "str r4, [%[r]], #4\n\t" "# A[80] * B\n\t" - "ldr r8, [%[a], #320]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #320]\n\t" + "str r5, [%[r]], #4\n\t" "# A[81] * B\n\t" - "ldr r8, [%[a], #324]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #324]\n\t" + "str r3, [%[r]], #4\n\t" "# A[82] * B\n\t" - "ldr r8, [%[a], #328]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #328]\n\t" + "str r4, [%[r]], #4\n\t" "# A[83] * B\n\t" - "ldr r8, [%[a], #332]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #332]\n\t" + "str r5, [%[r]], #4\n\t" "# A[84] * B\n\t" - "ldr r8, [%[a], #336]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #336]\n\t" + "str r3, [%[r]], #4\n\t" "# A[85] * B\n\t" - "ldr r8, [%[a], #340]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #340]\n\t" + "str r4, [%[r]], #4\n\t" "# A[86] * B\n\t" - "ldr r8, [%[a], #344]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #344]\n\t" + "str r5, [%[r]], #4\n\t" "# A[87] * B\n\t" - "ldr r8, [%[a], #348]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #348]\n\t" + "str r3, [%[r]], #4\n\t" "# A[88] * B\n\t" - "ldr r8, [%[a], #352]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #352]\n\t" + "str r4, [%[r]], #4\n\t" "# A[89] * B\n\t" - "ldr r8, [%[a], #356]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #356]\n\t" + "str r5, [%[r]], #4\n\t" "# A[90] * B\n\t" - "ldr r8, [%[a], #360]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #360]\n\t" + "str r3, [%[r]], #4\n\t" "# A[91] * B\n\t" - "ldr r8, [%[a], #364]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #364]\n\t" + "str r4, [%[r]], #4\n\t" "# A[92] * B\n\t" - "ldr r8, [%[a], #368]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #368]\n\t" + "str r5, [%[r]], #4\n\t" "# A[93] * B\n\t" - "ldr r8, [%[a], #372]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #372]\n\t" + "str r3, [%[r]], #4\n\t" "# A[94] * B\n\t" - "ldr r8, [%[a], #376]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #376]\n\t" + "str r4, [%[r]], #4\n\t" "# A[95] * B\n\t" - "ldr r8, [%[a], #380]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #380]\n\t" + "str r5, [%[r]], #4\n\t" "# A[96] * B\n\t" - "ldr r8, [%[a], #384]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #384]\n\t" + "str r3, [%[r]], #4\n\t" "# A[97] * B\n\t" - "ldr r8, [%[a], #388]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #388]\n\t" + "str r4, [%[r]], #4\n\t" "# A[98] * B\n\t" - "ldr r8, [%[a], #392]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #392]\n\t" + "str r5, [%[r]], #4\n\t" "# A[99] * B\n\t" - "ldr r8, [%[a], #396]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #396]\n\t" + "str r3, [%[r]], #4\n\t" "# A[100] * B\n\t" - "ldr r8, [%[a], #400]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #400]\n\t" + "str r4, [%[r]], #4\n\t" "# A[101] * B\n\t" - "ldr r8, [%[a], #404]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #404]\n\t" + "str r5, [%[r]], #4\n\t" "# A[102] * B\n\t" - "ldr r8, [%[a], #408]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #408]\n\t" + "str r3, [%[r]], #4\n\t" "# A[103] * B\n\t" - "ldr r8, [%[a], #412]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #412]\n\t" + "str r4, [%[r]], #4\n\t" "# A[104] * B\n\t" - "ldr r8, [%[a], #416]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #416]\n\t" + "str r5, [%[r]], #4\n\t" "# A[105] * B\n\t" - "ldr r8, [%[a], #420]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #420]\n\t" + "str r3, [%[r]], #4\n\t" "# A[106] * B\n\t" - "ldr r8, [%[a], #424]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #424]\n\t" + "str r4, [%[r]], #4\n\t" "# A[107] * B\n\t" - "ldr r8, [%[a], #428]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #428]\n\t" + "str r5, [%[r]], #4\n\t" "# A[108] * B\n\t" - "ldr r8, [%[a], #432]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #432]\n\t" + "str r3, [%[r]], #4\n\t" "# A[109] * B\n\t" - "ldr r8, [%[a], #436]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #436]\n\t" + "str r4, [%[r]], #4\n\t" "# A[110] * B\n\t" - "ldr r8, [%[a], #440]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #440]\n\t" + "str r5, [%[r]], #4\n\t" "# A[111] * B\n\t" - "ldr r8, [%[a], #444]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #444]\n\t" + "str r3, [%[r]], #4\n\t" "# A[112] * B\n\t" - "ldr r8, [%[a], #448]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #448]\n\t" + "str r4, [%[r]], #4\n\t" "# A[113] * B\n\t" - "ldr r8, [%[a], #452]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #452]\n\t" + "str r5, [%[r]], #4\n\t" "# A[114] * B\n\t" - "ldr r8, [%[a], #456]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #456]\n\t" + "str r3, [%[r]], #4\n\t" "# A[115] * B\n\t" - "ldr r8, [%[a], #460]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #460]\n\t" + "str r4, [%[r]], #4\n\t" "# A[116] * B\n\t" - "ldr r8, [%[a], #464]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #464]\n\t" + "str r5, [%[r]], #4\n\t" "# A[117] * B\n\t" - "ldr r8, [%[a], #468]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #468]\n\t" + "str r3, [%[r]], #4\n\t" "# A[118] * B\n\t" - "ldr r8, [%[a], #472]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #472]\n\t" + "str r4, [%[r]], #4\n\t" "# A[119] * B\n\t" - "ldr r8, [%[a], #476]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #476]\n\t" + "str r5, [%[r]], #4\n\t" "# A[120] * B\n\t" - "ldr r8, [%[a], #480]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #480]\n\t" + "str r3, [%[r]], #4\n\t" "# A[121] * B\n\t" - "ldr r8, [%[a], #484]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #484]\n\t" + "str r4, [%[r]], #4\n\t" "# A[122] * B\n\t" - "ldr r8, [%[a], #488]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #488]\n\t" + "str r5, [%[r]], #4\n\t" "# A[123] * B\n\t" - "ldr r8, [%[a], #492]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #492]\n\t" + "str r3, [%[r]], #4\n\t" "# A[124] * B\n\t" - "ldr r8, [%[a], #496]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #496]\n\t" + "str r4, [%[r]], #4\n\t" "# A[125] * B\n\t" - "ldr r8, [%[a], #500]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #500]\n\t" + "str r5, [%[r]], #4\n\t" "# A[126] * B\n\t" - "ldr r8, [%[a], #504]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #504]\n\t" + "str r3, [%[r]], #4\n\t" "# A[127] * B\n\t" - "ldr r8, [%[a], #508]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" - "str r4, [%[r], #508]\n\t" - "str r5, [%[r], #512]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); #endif @@ -22205,457 +21319,457 @@ static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a, const sp_di __asm__ __volatile__ ( "mov r9, #0\n\t" - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "subs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #24]\n\t" - "ldrd r4, r5, [%[a], #32]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #40]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #40]\n\t" - "ldrd r4, r5, [%[a], #48]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #56]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #56]\n\t" - "ldrd r4, r5, [%[a], #64]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #64]\n\t" - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #72]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #72]\n\t" - "ldrd r4, r5, [%[a], #80]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #80]\n\t" - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #88]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #88]\n\t" - "ldrd r4, r5, [%[a], #96]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #96]\n\t" - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #104]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #104]\n\t" - "ldrd r4, r5, [%[a], #112]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #112]\n\t" - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #120]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #120]\n\t" - "ldrd r4, r5, [%[a], #128]\n\t" - "ldrd r6, r7, [%[b], #128]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #128]\n\t" - "ldrd r4, r5, [%[a], #136]\n\t" - "ldrd r6, r7, [%[b], #136]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #136]\n\t" - "ldrd r4, r5, [%[a], #144]\n\t" - "ldrd r6, r7, [%[b], #144]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #144]\n\t" - "ldrd r4, r5, [%[a], #152]\n\t" - "ldrd r6, r7, [%[b], #152]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #152]\n\t" - "ldrd r4, r5, [%[a], #160]\n\t" - "ldrd r6, r7, [%[b], #160]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #160]\n\t" - "ldrd r4, r5, [%[a], #168]\n\t" - "ldrd r6, r7, [%[b], #168]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #168]\n\t" - "ldrd r4, r5, [%[a], #176]\n\t" - "ldrd r6, r7, [%[b], #176]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #176]\n\t" - "ldrd r4, r5, [%[a], #184]\n\t" - "ldrd r6, r7, [%[b], #184]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #184]\n\t" - "ldrd r4, r5, [%[a], #192]\n\t" - "ldrd r6, r7, [%[b], #192]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #192]\n\t" - "ldrd r4, r5, [%[a], #200]\n\t" - "ldrd r6, r7, [%[b], #200]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #200]\n\t" - "ldrd r4, r5, [%[a], #208]\n\t" - "ldrd r6, r7, [%[b], #208]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #208]\n\t" - "ldrd r4, r5, [%[a], #216]\n\t" - "ldrd r6, r7, [%[b], #216]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #216]\n\t" - "ldrd r4, r5, [%[a], #224]\n\t" - "ldrd r6, r7, [%[b], #224]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #224]\n\t" - "ldrd r4, r5, [%[a], #232]\n\t" - "ldrd r6, r7, [%[b], #232]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #232]\n\t" - "ldrd r4, r5, [%[a], #240]\n\t" - "ldrd r6, r7, [%[b], #240]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #240]\n\t" - "ldrd r4, r5, [%[a], #248]\n\t" - "ldrd r6, r7, [%[b], #248]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #248]\n\t" - "ldrd r4, r5, [%[a], #256]\n\t" - "ldrd r6, r7, [%[b], #256]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #256]\n\t" - "ldrd r4, r5, [%[a], #264]\n\t" - "ldrd r6, r7, [%[b], #264]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #264]\n\t" - "ldrd r4, r5, [%[a], #272]\n\t" - "ldrd r6, r7, [%[b], #272]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #272]\n\t" - "ldrd r4, r5, [%[a], #280]\n\t" - "ldrd r6, r7, [%[b], #280]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #280]\n\t" - "ldrd r4, r5, [%[a], #288]\n\t" - "ldrd r6, r7, [%[b], #288]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #288]\n\t" - "ldrd r4, r5, [%[a], #296]\n\t" - "ldrd r6, r7, [%[b], #296]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #296]\n\t" - "ldrd r4, r5, [%[a], #304]\n\t" - "ldrd r6, r7, [%[b], #304]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #304]\n\t" - "ldrd r4, r5, [%[a], #312]\n\t" - "ldrd r6, r7, [%[b], #312]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #312]\n\t" - "ldrd r4, r5, [%[a], #320]\n\t" - "ldrd r6, r7, [%[b], #320]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #320]\n\t" - "ldrd r4, r5, [%[a], #328]\n\t" - "ldrd r6, r7, [%[b], #328]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #328]\n\t" - "ldrd r4, r5, [%[a], #336]\n\t" - "ldrd r6, r7, [%[b], #336]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #336]\n\t" - "ldrd r4, r5, [%[a], #344]\n\t" - "ldrd r6, r7, [%[b], #344]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #344]\n\t" - "ldrd r4, r5, [%[a], #352]\n\t" - "ldrd r6, r7, [%[b], #352]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #352]\n\t" - "ldrd r4, r5, [%[a], #360]\n\t" - "ldrd r6, r7, [%[b], #360]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #360]\n\t" - "ldrd r4, r5, [%[a], #368]\n\t" - "ldrd r6, r7, [%[b], #368]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #368]\n\t" - "ldrd r4, r5, [%[a], #376]\n\t" - "ldrd r6, r7, [%[b], #376]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #376]\n\t" - "ldrd r4, r5, [%[a], #384]\n\t" - "ldrd r6, r7, [%[b], #384]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #384]\n\t" - "ldrd r4, r5, [%[a], #392]\n\t" - "ldrd r6, r7, [%[b], #392]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #392]\n\t" - "ldrd r4, r5, [%[a], #400]\n\t" - "ldrd r6, r7, [%[b], #400]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #400]\n\t" - "ldrd r4, r5, [%[a], #408]\n\t" - "ldrd r6, r7, [%[b], #408]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #408]\n\t" - "ldrd r4, r5, [%[a], #416]\n\t" - "ldrd r6, r7, [%[b], #416]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #416]\n\t" - "ldrd r4, r5, [%[a], #424]\n\t" - "ldrd r6, r7, [%[b], #424]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #424]\n\t" - "ldrd r4, r5, [%[a], #432]\n\t" - "ldrd r6, r7, [%[b], #432]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #432]\n\t" - "ldrd r4, r5, [%[a], #440]\n\t" - "ldrd r6, r7, [%[b], #440]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #440]\n\t" - "ldrd r4, r5, [%[a], #448]\n\t" - "ldrd r6, r7, [%[b], #448]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #448]\n\t" - "ldrd r4, r5, [%[a], #456]\n\t" - "ldrd r6, r7, [%[b], #456]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #456]\n\t" - "ldrd r4, r5, [%[a], #464]\n\t" - "ldrd r6, r7, [%[b], #464]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #464]\n\t" - "ldrd r4, r5, [%[a], #472]\n\t" - "ldrd r6, r7, [%[b], #472]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #472]\n\t" - "ldrd r4, r5, [%[a], #480]\n\t" - "ldrd r6, r7, [%[b], #480]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #480]\n\t" - "ldrd r4, r5, [%[a], #488]\n\t" - "ldrd r6, r7, [%[b], #488]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #488]\n\t" - "ldrd r4, r5, [%[a], #496]\n\t" - "ldrd r6, r7, [%[b], #496]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #496]\n\t" - "ldrd r4, r5, [%[a], #504]\n\t" - "ldrd r6, r7, [%[b], #504]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #504]\n\t" + "stm %[r]!, {r4, r5}\n\t" "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : [m] "r" (m) : "memory", "r4", "r5", "r6", "r7", "r8", "r9" ); #endif /* WOLFSSL_SP_SMALL */ @@ -23895,22 +23009,19 @@ static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, "add r14, %[a], #512\n\t" "\n1:\n\t" "rsbs %[c], %[c], #0\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "sbc %[c], r3, r3\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r4, r4\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -23929,330 +23040,234 @@ static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "subs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldrd r3, r4, [%[a], #64]\n\t" - "ldrd r5, r6, [%[a], #72]\n\t" - "ldrd r7, r8, [%[b], #64]\n\t" - "ldrd r9, r10, [%[b], #72]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #64]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "ldrd r3, r4, [%[a], #80]\n\t" - "ldrd r5, r6, [%[a], #88]\n\t" - "ldrd r7, r8, [%[b], #80]\n\t" - "ldrd r9, r10, [%[b], #88]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #80]\n\t" - "strd r5, r6, [%[r], #88]\n\t" - "ldrd r3, r4, [%[a], #96]\n\t" - "ldrd r5, r6, [%[a], #104]\n\t" - "ldrd r7, r8, [%[b], #96]\n\t" - "ldrd r9, r10, [%[b], #104]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #96]\n\t" - "strd r5, r6, [%[r], #104]\n\t" - "ldrd r3, r4, [%[a], #112]\n\t" - "ldrd r5, r6, [%[a], #120]\n\t" - "ldrd r7, r8, [%[b], #112]\n\t" - "ldrd r9, r10, [%[b], #120]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #112]\n\t" - "strd r5, r6, [%[r], #120]\n\t" - "ldrd r3, r4, [%[a], #128]\n\t" - "ldrd r5, r6, [%[a], #136]\n\t" - "ldrd r7, r8, [%[b], #128]\n\t" - "ldrd r9, r10, [%[b], #136]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #128]\n\t" - "strd r5, r6, [%[r], #136]\n\t" - "ldrd r3, r4, [%[a], #144]\n\t" - "ldrd r5, r6, [%[a], #152]\n\t" - "ldrd r7, r8, [%[b], #144]\n\t" - "ldrd r9, r10, [%[b], #152]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #144]\n\t" - "strd r5, r6, [%[r], #152]\n\t" - "ldrd r3, r4, [%[a], #160]\n\t" - "ldrd r5, r6, [%[a], #168]\n\t" - "ldrd r7, r8, [%[b], #160]\n\t" - "ldrd r9, r10, [%[b], #168]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #160]\n\t" - "strd r5, r6, [%[r], #168]\n\t" - "ldrd r3, r4, [%[a], #176]\n\t" - "ldrd r5, r6, [%[a], #184]\n\t" - "ldrd r7, r8, [%[b], #176]\n\t" - "ldrd r9, r10, [%[b], #184]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #176]\n\t" - "strd r5, r6, [%[r], #184]\n\t" - "ldrd r3, r4, [%[a], #192]\n\t" - "ldrd r5, r6, [%[a], #200]\n\t" - "ldrd r7, r8, [%[b], #192]\n\t" - "ldrd r9, r10, [%[b], #200]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #192]\n\t" - "strd r5, r6, [%[r], #200]\n\t" - "ldrd r3, r4, [%[a], #208]\n\t" - "ldrd r5, r6, [%[a], #216]\n\t" - "ldrd r7, r8, [%[b], #208]\n\t" - "ldrd r9, r10, [%[b], #216]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #208]\n\t" - "strd r5, r6, [%[r], #216]\n\t" - "ldrd r3, r4, [%[a], #224]\n\t" - "ldrd r5, r6, [%[a], #232]\n\t" - "ldrd r7, r8, [%[b], #224]\n\t" - "ldrd r9, r10, [%[b], #232]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #224]\n\t" - "strd r5, r6, [%[r], #232]\n\t" - "ldrd r3, r4, [%[a], #240]\n\t" - "ldrd r5, r6, [%[a], #248]\n\t" - "ldrd r7, r8, [%[b], #240]\n\t" - "ldrd r9, r10, [%[b], #248]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #240]\n\t" - "strd r5, r6, [%[r], #248]\n\t" - "ldrd r3, r4, [%[a], #256]\n\t" - "ldrd r5, r6, [%[a], #264]\n\t" - "ldrd r7, r8, [%[b], #256]\n\t" - "ldrd r9, r10, [%[b], #264]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #256]\n\t" - "strd r5, r6, [%[r], #264]\n\t" - "ldrd r3, r4, [%[a], #272]\n\t" - "ldrd r5, r6, [%[a], #280]\n\t" - "ldrd r7, r8, [%[b], #272]\n\t" - "ldrd r9, r10, [%[b], #280]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #272]\n\t" - "strd r5, r6, [%[r], #280]\n\t" - "ldrd r3, r4, [%[a], #288]\n\t" - "ldrd r5, r6, [%[a], #296]\n\t" - "ldrd r7, r8, [%[b], #288]\n\t" - "ldrd r9, r10, [%[b], #296]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #288]\n\t" - "strd r5, r6, [%[r], #296]\n\t" - "ldrd r3, r4, [%[a], #304]\n\t" - "ldrd r5, r6, [%[a], #312]\n\t" - "ldrd r7, r8, [%[b], #304]\n\t" - "ldrd r9, r10, [%[b], #312]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #304]\n\t" - "strd r5, r6, [%[r], #312]\n\t" - "ldrd r3, r4, [%[a], #320]\n\t" - "ldrd r5, r6, [%[a], #328]\n\t" - "ldrd r7, r8, [%[b], #320]\n\t" - "ldrd r9, r10, [%[b], #328]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #320]\n\t" - "strd r5, r6, [%[r], #328]\n\t" - "ldrd r3, r4, [%[a], #336]\n\t" - "ldrd r5, r6, [%[a], #344]\n\t" - "ldrd r7, r8, [%[b], #336]\n\t" - "ldrd r9, r10, [%[b], #344]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #336]\n\t" - "strd r5, r6, [%[r], #344]\n\t" - "ldrd r3, r4, [%[a], #352]\n\t" - "ldrd r5, r6, [%[a], #360]\n\t" - "ldrd r7, r8, [%[b], #352]\n\t" - "ldrd r9, r10, [%[b], #360]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #352]\n\t" - "strd r5, r6, [%[r], #360]\n\t" - "ldrd r3, r4, [%[a], #368]\n\t" - "ldrd r5, r6, [%[a], #376]\n\t" - "ldrd r7, r8, [%[b], #368]\n\t" - "ldrd r9, r10, [%[b], #376]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #368]\n\t" - "strd r5, r6, [%[r], #376]\n\t" - "ldrd r3, r4, [%[a], #384]\n\t" - "ldrd r5, r6, [%[a], #392]\n\t" - "ldrd r7, r8, [%[b], #384]\n\t" - "ldrd r9, r10, [%[b], #392]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #384]\n\t" - "strd r5, r6, [%[r], #392]\n\t" - "ldrd r3, r4, [%[a], #400]\n\t" - "ldrd r5, r6, [%[a], #408]\n\t" - "ldrd r7, r8, [%[b], #400]\n\t" - "ldrd r9, r10, [%[b], #408]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #400]\n\t" - "strd r5, r6, [%[r], #408]\n\t" - "ldrd r3, r4, [%[a], #416]\n\t" - "ldrd r5, r6, [%[a], #424]\n\t" - "ldrd r7, r8, [%[b], #416]\n\t" - "ldrd r9, r10, [%[b], #424]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #416]\n\t" - "strd r5, r6, [%[r], #424]\n\t" - "ldrd r3, r4, [%[a], #432]\n\t" - "ldrd r5, r6, [%[a], #440]\n\t" - "ldrd r7, r8, [%[b], #432]\n\t" - "ldrd r9, r10, [%[b], #440]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #432]\n\t" - "strd r5, r6, [%[r], #440]\n\t" - "ldrd r3, r4, [%[a], #448]\n\t" - "ldrd r5, r6, [%[a], #456]\n\t" - "ldrd r7, r8, [%[b], #448]\n\t" - "ldrd r9, r10, [%[b], #456]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #448]\n\t" - "strd r5, r6, [%[r], #456]\n\t" - "ldrd r3, r4, [%[a], #464]\n\t" - "ldrd r5, r6, [%[a], #472]\n\t" - "ldrd r7, r8, [%[b], #464]\n\t" - "ldrd r9, r10, [%[b], #472]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #464]\n\t" - "strd r5, r6, [%[r], #472]\n\t" - "ldrd r3, r4, [%[a], #480]\n\t" - "ldrd r5, r6, [%[a], #488]\n\t" - "ldrd r7, r8, [%[b], #480]\n\t" - "ldrd r9, r10, [%[b], #488]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #480]\n\t" - "strd r5, r6, [%[r], #488]\n\t" - "ldrd r3, r4, [%[a], #496]\n\t" - "ldrd r5, r6, [%[a], #504]\n\t" - "ldrd r7, r8, [%[b], #496]\n\t" - "ldrd r9, r10, [%[b], #504]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #496]\n\t" - "strd r5, r6, [%[r], #504]\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -27763,7 +26778,8 @@ static void sp_4096_lshift_128(sp_digit* r, const sp_digit* a, byte n) "lsl r2, r2, %[n]\n\t" "lsr r5, r5, r6\n\t" "orr r3, r3, r5\n\t" - "strd r2, r3, [%[r]]\n\t" + "str r2, [%[r], #0]\n\t" + "str r3, [%[r], #4]\n\t" : : [r] "r" (r), [a] "r" (a), [n] "r" (n) : "memory", "r2", "r3", "r4", "r5", "r6" @@ -28994,23 +28010,20 @@ static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, "add r14, %[a], #32\n\t" "\n1:\n\t" "adds %[c], %[c], #-1\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "adcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "mov r3, #0\n\t" - "adc %[c], r3, #0\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -29030,30 +28043,24 @@ static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "adds r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "adc %[c], r14, r14\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -29076,22 +28083,19 @@ static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, "add r14, %[a], #32\n\t" "\n1:\n\t" "rsbs %[c], %[c], #0\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "sbc %[c], r3, r3\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r4, r4\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -29110,30 +28114,24 @@ static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "subs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -31019,37 +30017,37 @@ static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a, const sp_digit __asm__ __volatile__ ( "mov r9, #0\n\t" - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "subs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #24]\n\t" + "stm %[r]!, {r4, r5}\n\t" "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : [m] "r" (m) : "memory", "r4", "r5", "r6", "r7", "r8", "r9" ); #endif /* WOLFSSL_SP_SMALL */ @@ -31507,51 +30505,51 @@ static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) { __asm__ __volatile__ ( - "mov r9, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "and r14, r3, #1\n\t" - "sub r7, r9, r14\n\t" - "and r8, r7, #1\n\t" - "adds r3, r3, r7\n\t" - "adcs r4, r4, r7\n\t" - "adcs r5, r5, r7\n\t" - "adcs r6, r6, r9\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "adcs r3, r3, r9\n\t" - "adcs r4, r4, r9\n\t" + "mov r10, #0\n\t" + "ldrd r4, r5, [%[a], #0]\n\t" + "ldrd r6, r7, [%[a], #8]\n\t" + "and r14, r4, #1\n\t" + "sub r8, r10, r14\n\t" + "and r9, r8, #1\n\t" + "adds r4, r4, r8\n\t" "adcs r5, r5, r8\n\t" - "adcs r6, r6, r7\n\t" - "adc r14, r9, r9\n\t" - "lsr r7, r3, #1\n\t" + "adcs r6, r6, r8\n\t" + "adcs r7, r7, r10\n\t" + "strd r4, r5, [%[r], #0]\n\t" + "strd r6, r7, [%[r], #8]\n\t" + "ldrd r4, r5, [%[a], #16]\n\t" + "ldrd r6, r7, [%[a], #24]\n\t" + "adcs r4, r4, r10\n\t" + "adcs r5, r5, r10\n\t" + "adcs r6, r6, r9\n\t" + "adcs r7, r7, r8\n\t" + "adc r14, r10, r10\n\t" "lsr r8, r4, #1\n\t" "lsr r9, r5, #1\n\t" "lsr r10, r6, #1\n\t" - "orr r7, r7, r4, lsl #31\n\t" + "lsr r11, r7, #1\n\t" "orr r8, r8, r5, lsl #31\n\t" "orr r9, r9, r6, lsl #31\n\t" - "orr r10, r10, r14, lsl #31\n\t" - "mov r14, r3\n\t" - "strd r7, r8, [%[r], #16]\n\t" - "strd r9, r10, [%[r], #24]\n\t" - "ldrd r3, r4, [%[r], #0]\n\t" - "ldrd r5, r6, [%[r], #8]\n\t" - "lsr r7, r3, #1\n\t" + "orr r10, r10, r7, lsl #31\n\t" + "orr r11, r11, r14, lsl #31\n\t" + "mov r14, r4\n\t" + "strd r8, r9, [%[r], #16]\n\t" + "strd r10, r11, [%[r], #24]\n\t" + "ldrd r4, r5, [%[r], #0]\n\t" + "ldrd r6, r7, [%[r], #8]\n\t" "lsr r8, r4, #1\n\t" "lsr r9, r5, #1\n\t" "lsr r10, r6, #1\n\t" - "orr r7, r7, r4, lsl #31\n\t" + "lsr r11, r7, #1\n\t" "orr r8, r8, r5, lsl #31\n\t" "orr r9, r9, r6, lsl #31\n\t" - "orr r10, r10, r14, lsl #31\n\t" - "strd r7, r8, [%[r], #0]\n\t" - "strd r9, r10, [%[r], #8]\n\t" + "orr r10, r10, r7, lsl #31\n\t" + "orr r11, r11, r14, lsl #31\n\t" + "strd r8, r9, [%[r], #0]\n\t" + "strd r10, r11, [%[r], #8]\n\t" : : [r] "r" (r), [a] "r" (a), [m] "r" (m) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); } @@ -35041,25 +34039,21 @@ int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am, static void sp_256_add_one_8(sp_digit* a) { __asm__ __volatile__ ( - "ldrd r1, r2, [%[a], #0]\n\t" - "ldrd r3, r4, [%[a], #8]\n\t" - "adds r1, r1, #1\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "adds r2, r2, #1\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "strd r1, r2, [%[a], #0]\n\t" - "strd r3, r4, [%[a], #8]\n\t" - "ldrd r1, r2, [%[a], #16]\n\t" - "ldrd r3, r4, [%[a], #24]\n\t" - "adcs r1, r1, #0\n\t" - "adcs r2, r2, #0\n\t" - "adcs r3, r3, #0\n\t" - "adcs r4, r4, #0\n\t" - "strd r1, r2, [%[a], #16]\n\t" - "strd r3, r4, [%[a], #24]\n\t" + "adcs r5, r5, #0\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + : [a] "+r" (a) : - : [a] "r" (a) - : "memory", "r1", "r2", "r3", "r4" + : "memory", "r2", "r3", "r4", "r5" ); } @@ -35319,22 +34313,19 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) "add r12, %[a], #32\n\t" "\n1:\n\t" "subs %[c], r14, %[c]\n\t" - "ldrd r3, r4, [%[a]]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[a]], #8\n\t" - "strd r5, r6, [%[a]], #8\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" "sbc %[c], r14, r14\n\t" "cmp %[a], r12\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" ); return c; @@ -35351,30 +34342,24 @@ static sp_digit sp_256_sub_in_place_8(sp_digit* a, const sp_digit* b) sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r2, r3, [%[a], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" - "ldrd r8, r9, [%[b], #8]\n\t" - "subs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #0]\n\t" - "strd r4, r5, [%[a], #8]\n\t" - "ldrd r2, r3, [%[a], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" - "ldrd r8, r9, [%[b], #24]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #16]\n\t" - "strd r4, r5, [%[a], #24]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r11, r11\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -35422,61 +34407,61 @@ static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r10, #0\n\t" "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r3, r4, %[b], r8\n\t" "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" + "str r4, [%[r]], #4\n\t" "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" + "str r5, [%[r]], #4\n\t" "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" + "str r3, [%[r]], #4\n\t" "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" + "str r4, [%[r]], #4\n\t" "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" + "str r5, [%[r]], #4\n\t" "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" + "str r3, [%[r]], #4\n\t" "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" - "str r4, [%[r], #28]\n\t" - "str r5, [%[r], #32]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); #endif @@ -36182,36 +35167,36 @@ int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng, static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( - "mov r9, #0\n\t" + "mov r10, #0\n\t" "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "lsr r7, r3, #1\n\t" + "ldrd r4, r5, [%[a], #16]\n\t" + "ldrd r6, r7, [%[a], #24]\n\t" "lsr r8, r4, #1\n\t" "lsr r9, r5, #1\n\t" "lsr r10, r6, #1\n\t" - "orr r7, r7, r4, lsl #31\n\t" + "lsr r11, r7, #1\n\t" "orr r8, r8, r5, lsl #31\n\t" "orr r9, r9, r6, lsl #31\n\t" - "orr r10, r10, r14, lsl #31\n\t" - "mov r14, r3\n\t" - "strd r7, r8, [%[r], #16]\n\t" - "strd r9, r10, [%[r], #24]\n\t" - "ldrd r3, r4, [%[r], #0]\n\t" - "ldrd r5, r6, [%[r], #8]\n\t" - "lsr r7, r3, #1\n\t" + "orr r10, r10, r7, lsl #31\n\t" + "orr r11, r11, r14, lsl #31\n\t" + "mov r14, r4\n\t" + "strd r8, r9, [%[r], #16]\n\t" + "strd r10, r11, [%[r], #24]\n\t" + "ldrd r4, r5, [%[r], #0]\n\t" + "ldrd r6, r7, [%[r], #8]\n\t" "lsr r8, r4, #1\n\t" "lsr r9, r5, #1\n\t" "lsr r10, r6, #1\n\t" - "orr r7, r7, r4, lsl #31\n\t" + "lsr r11, r7, #1\n\t" "orr r8, r8, r5, lsl #31\n\t" "orr r9, r9, r6, lsl #31\n\t" - "orr r10, r10, r14, lsl #31\n\t" - "strd r7, r8, [%[r], #0]\n\t" - "strd r9, r10, [%[r], #8]\n\t" + "orr r10, r10, r7, lsl #31\n\t" + "orr r11, r11, r14, lsl #31\n\t" + "strd r8, r9, [%[r], #0]\n\t" + "strd r10, r11, [%[r], #8]\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); } @@ -39290,23 +38275,20 @@ static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, "add r14, %[a], #48\n\t" "\n1:\n\t" "adds %[c], %[c], #-1\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "adcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "mov r3, #0\n\t" - "adc %[c], r3, #0\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -39326,40 +38308,31 @@ static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "adds r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "adc %[c], r14, r14\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -39382,22 +38355,19 @@ static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, "add r14, %[a], #48\n\t" "\n1:\n\t" "rsbs %[c], %[c], #0\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "sbc %[c], r3, r3\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r4, r4\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -39416,40 +38386,31 @@ static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "subs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -39786,51 +38747,51 @@ static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a, const sp_digi __asm__ __volatile__ ( "mov r9, #0\n\t" - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "subs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #24]\n\t" - "ldrd r4, r5, [%[a], #32]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #40]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #40]\n\t" + "stm %[r]!, {r4, r5}\n\t" "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : [m] "r" (m) : "memory", "r4", "r5", "r6", "r7", "r8", "r9" ); #endif /* WOLFSSL_SP_SMALL */ @@ -40570,7 +39531,8 @@ static void sp_384_rshift1_12(sp_digit* r, const sp_digit* a) "str r2, [%[r], #36]\n\t" "orr r3, r3, r4, lsl #31\n\t" "lsr r4, r4, #1\n\t" - "strd r3, r4, [%[r], #40]\n\t" + "str r3, [%[r], #40]\n\t" + "str r4, [%[r], #44]\n\t" : : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4" @@ -44135,33 +43097,27 @@ int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am, static void sp_384_add_one_12(sp_digit* a) { __asm__ __volatile__ ( - "ldrd r1, r2, [%[a], #0]\n\t" - "ldrd r3, r4, [%[a], #8]\n\t" - "adds r1, r1, #1\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "adds r2, r2, #1\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "strd r1, r2, [%[a], #0]\n\t" - "strd r3, r4, [%[a], #8]\n\t" - "ldrd r1, r2, [%[a], #16]\n\t" - "ldrd r3, r4, [%[a], #24]\n\t" - "adcs r1, r1, #0\n\t" + "adcs r5, r5, #0\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "strd r1, r2, [%[a], #16]\n\t" - "strd r3, r4, [%[a], #24]\n\t" - "ldrd r1, r2, [%[a], #32]\n\t" - "ldrd r3, r4, [%[a], #40]\n\t" - "adcs r1, r1, #0\n\t" - "adcs r2, r2, #0\n\t" - "adcs r3, r3, #0\n\t" - "adcs r4, r4, #0\n\t" - "strd r1, r2, [%[a], #32]\n\t" - "strd r3, r4, [%[a], #40]\n\t" + "adcs r5, r5, #0\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + : [a] "+r" (a) : - : [a] "r" (a) - : "memory", "r1", "r2", "r3", "r4" + : "memory", "r2", "r3", "r4", "r5" ); } @@ -44421,22 +43377,19 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) "add r12, %[a], #48\n\t" "\n1:\n\t" "subs %[c], r14, %[c]\n\t" - "ldrd r3, r4, [%[a]]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[a]], #8\n\t" - "strd r5, r6, [%[a]], #8\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" "sbc %[c], r14, r14\n\t" "cmp %[a], r12\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" ); return c; @@ -44453,40 +43406,31 @@ static sp_digit sp_384_sub_in_place_12(sp_digit* a, const sp_digit* b) sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r2, r3, [%[a], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" - "ldrd r8, r9, [%[b], #8]\n\t" - "subs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #0]\n\t" - "strd r4, r5, [%[a], #8]\n\t" - "ldrd r2, r3, [%[a], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" - "ldrd r8, r9, [%[b], #24]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #16]\n\t" - "strd r4, r5, [%[a], #24]\n\t" - "ldrd r2, r3, [%[a], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" - "ldrd r8, r9, [%[b], #40]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #32]\n\t" - "strd r4, r5, [%[a], #40]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r11, r11\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -44534,89 +43478,89 @@ static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r10, #0\n\t" "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r3, r4, %[b], r8\n\t" "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" + "str r4, [%[r]], #4\n\t" "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" + "str r5, [%[r]], #4\n\t" "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" + "str r3, [%[r]], #4\n\t" "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" + "str r4, [%[r]], #4\n\t" "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" + "str r5, [%[r]], #4\n\t" "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" + "str r3, [%[r]], #4\n\t" "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" + "str r4, [%[r]], #4\n\t" "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" + "str r5, [%[r]], #4\n\t" "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" + "str r3, [%[r]], #4\n\t" "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" + "str r4, [%[r]], #4\n\t" "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adc r3, r3, r7\n\t" - "str r5, [%[r], #44]\n\t" - "str r3, [%[r], #48]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) + "str r5, [%[r]], #4\n\t" + "str r3, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); #endif @@ -50105,30 +49049,27 @@ static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, "add r14, %[a], #64\n\t" "\n1:\n\t" "adds %[c], %[c], #-1\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "adcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "mov r3, #0\n\t" - "adc %[c], r3, #0\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" "adds %[c], %[c], #-1\n\t" - "ldr r3, [%[a]], #4\n\t" - "ldr r7, [%[b]], #4\n\t" - "adcs r3, r3, r7\n\t" - "str r3, [%[r]], #4\n\t" - "mov r3, #0\n\t" - "adc %[c], r3, #0\n\t" + "ldm %[a]!, {r4}\n\t" + "ldm %[b]!, {r8}\n\t" + "adcs r4, r4, r8\n\t" + "stm %[r]!, {r4}\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -50148,54 +49089,42 @@ static sp_digit sp_521_add_17(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "adds r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4}\n\t" + "ldm %[b]!, {r8}\n\t" "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r7, [%[b], #64]\n\t" - "adcs r3, r3, r7\n\t" - "str r3, [%[r], #64]\n\t" + "stm %[r]!, {r4}\n\t" "adc %[c], r14, r14\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -50218,28 +49147,25 @@ static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, "add r14, %[a], #64\n\t" "\n1:\n\t" "rsbs %[c], %[c], #0\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "sbc %[c], r3, r3\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r4, r4\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" "rsbs %[c], %[c], #0\n\t" - "ldr r3, [%[a]], #4\n\t" - "ldr r7, [%[b]], #4\n\t" - "sbcs r3, r3, r7\n\t" - "str r3, [%[r]], #4\n\t" - "sbc %[c], r6, r6\n\t" + "ldm %[a]!, {r4}\n\t" + "ldm %[b]!, {r8}\n\t" + "sbcs r4, r4, r8\n\t" + "stm %[r]!, {r4}\n\t" + "sbc %[c], r7, r7\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -50258,54 +49184,42 @@ static sp_digit sp_521_sub_17(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "subs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4}\n\t" + "ldm %[b]!, {r8}\n\t" "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldr r3, [%[a], #64]\n\t" - "ldr r7, [%[b], #64]\n\t" - "sbcs r3, r3, r7\n\t" - "str r3, [%[r], #64]\n\t" + "stm %[r]!, {r4}\n\t" "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -50558,70 +49472,70 @@ static sp_digit sp_521_cond_sub_17(sp_digit* r, const sp_digit* a, const sp_digi __asm__ __volatile__ ( "mov r9, #0\n\t" - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "subs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #24]\n\t" - "ldrd r4, r5, [%[a], #32]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #40]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #40]\n\t" - "ldrd r4, r5, [%[a], #48]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #56]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #56]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r6, [%[b], #64]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldr r4, [%[a]]\n\t" + "ldr r6, [%[b]]\n\t" "and r6, r6, %[m]\n\t" "sbcs r4, r4, r6\n\t" - "str r4, [%[r], #64]\n\t" + "str r4, [%[r]]\n\t" "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : [m] "r" (m) : "memory", "r4", "r5", "r6", "r7", "r8", "r9" ); #endif /* WOLFSSL_SP_SMALL */ @@ -50647,40 +49561,40 @@ SP_NOINLINE static void sp_521_mont_reduce_17(sp_digit* a, const sp_digit* m, "add r14, %[a], #64\n\t" "ldm r14!, {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10}\n\t" "lsr r1, r1, #9\n\t" - "orr r1, r1, r2, lsl 23\n\t" + "orr r1, r1, r2, lsl #23\n\t" "lsr r2, r2, #9\n\t" - "orr r2, r2, r3, lsl 23\n\t" + "orr r2, r2, r3, lsl #23\n\t" "lsr r3, r3, #9\n\t" - "orr r3, r3, r4, lsl 23\n\t" + "orr r3, r3, r4, lsl #23\n\t" "lsr r4, r4, #9\n\t" - "orr r4, r4, r5, lsl 23\n\t" + "orr r4, r4, r5, lsl #23\n\t" "lsr r5, r5, #9\n\t" - "orr r5, r5, r6, lsl 23\n\t" + "orr r5, r5, r6, lsl #23\n\t" "lsr r6, r6, #9\n\t" - "orr r6, r6, r7, lsl 23\n\t" + "orr r6, r6, r7, lsl #23\n\t" "lsr r7, r7, #9\n\t" - "orr r7, r7, r8, lsl 23\n\t" + "orr r7, r7, r8, lsl #23\n\t" "lsr r8, r8, #9\n\t" - "orr r8, r8, r9, lsl 23\n\t" + "orr r8, r8, r9, lsl #23\n\t" "lsr r9, r9, #9\n\t" - "orr r9, r9, r10, lsl 23\n\t" + "orr r9, r9, r10, lsl #23\n\t" "stm r12!, {r1, r2, r3, r4, r5, r6, r7, r8, r9}\n\t" "mov r1, r10\n\t" "ldm r14, {r2, r3, r4, r5, r6, r7, r8}\n\t" "lsr r1, r1, #9\n\t" - "orr r1, r1, r2, lsl 23\n\t" + "orr r1, r1, r2, lsl #23\n\t" "lsr r2, r2, #9\n\t" - "orr r2, r2, r3, lsl 23\n\t" + "orr r2, r2, r3, lsl #23\n\t" "lsr r3, r3, #9\n\t" - "orr r3, r3, r4, lsl 23\n\t" + "orr r3, r3, r4, lsl #23\n\t" "lsr r4, r4, #9\n\t" - "orr r4, r4, r5, lsl 23\n\t" + "orr r4, r4, r5, lsl #23\n\t" "lsr r5, r5, #9\n\t" - "orr r5, r5, r6, lsl 23\n\t" + "orr r5, r5, r6, lsl #23\n\t" "lsr r6, r6, #9\n\t" - "orr r6, r6, r7, lsl 23\n\t" + "orr r6, r6, r7, lsl #23\n\t" "lsr r7, r7, #9\n\t" - "orr r7, r7, r8, lsl 23\n\t" + "orr r7, r7, r8, lsl #23\n\t" "lsr r8, r8, #9\n\t" "stm r12!, {r1, r2, r3, r4, r5, r6, r7, r8}\n\t" "mov r14, sp\n\t" @@ -50766,7 +49680,7 @@ SP_NOINLINE static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* "\n1:\n\t" "# mu = a[i] * mp\n\t" "mul r8, %[mp], r10\n\t" - "cmp r12, 64\n\t" + "cmp r12, #64\n\t" "bne L_521_mont_reduce_17_nomask\n\t" "mov r7, #0x1ff\n\t" "and r8, r8, r7\n\t" @@ -50935,67 +49849,67 @@ SP_NOINLINE static void sp_521_mont_reduce_order_17(sp_digit* a, const sp_digit* "ldr r4, [%[a], #0]\n\t" "ldr r5, [%[a], #4]\n\t" "lsr r4, r4, #9\n\t" - "orr r4, r4, r5, lsl 23\n\t" + "orr r4, r4, r5, lsl #23\n\t" "str r4, [%[a], #4]\n\t" "ldr r4, [%[a], #8]\n\t" "lsr r5, r5, #9\n\t" - "orr r5, r5, r4, lsl 23\n\t" + "orr r5, r5, r4, lsl #23\n\t" "str r5, [%[a], #8]\n\t" "ldr r5, [%[a], #12]\n\t" "lsr r4, r4, #9\n\t" - "orr r4, r4, r5, lsl 23\n\t" + "orr r4, r4, r5, lsl #23\n\t" "str r4, [%[a], #12]\n\t" "ldr r4, [%[a], #16]\n\t" "lsr r5, r5, #9\n\t" - "orr r5, r5, r4, lsl 23\n\t" + "orr r5, r5, r4, lsl #23\n\t" "str r5, [%[a], #16]\n\t" "ldr r5, [%[a], #20]\n\t" "lsr r4, r4, #9\n\t" - "orr r4, r4, r5, lsl 23\n\t" + "orr r4, r4, r5, lsl #23\n\t" "str r4, [%[a], #20]\n\t" "ldr r4, [%[a], #24]\n\t" "lsr r5, r5, #9\n\t" - "orr r5, r5, r4, lsl 23\n\t" + "orr r5, r5, r4, lsl #23\n\t" "str r5, [%[a], #24]\n\t" "ldr r5, [%[a], #28]\n\t" "lsr r4, r4, #9\n\t" - "orr r4, r4, r5, lsl 23\n\t" + "orr r4, r4, r5, lsl #23\n\t" "str r4, [%[a], #28]\n\t" "ldr r4, [%[a], #32]\n\t" "lsr r5, r5, #9\n\t" - "orr r5, r5, r4, lsl 23\n\t" + "orr r5, r5, r4, lsl #23\n\t" "str r5, [%[a], #32]\n\t" "ldr r5, [%[a], #36]\n\t" "lsr r4, r4, #9\n\t" - "orr r4, r4, r5, lsl 23\n\t" + "orr r4, r4, r5, lsl #23\n\t" "str r4, [%[a], #36]\n\t" "ldr r4, [%[a], #40]\n\t" "lsr r5, r5, #9\n\t" - "orr r5, r5, r4, lsl 23\n\t" + "orr r5, r5, r4, lsl #23\n\t" "str r5, [%[a], #40]\n\t" "ldr r5, [%[a], #44]\n\t" "lsr r4, r4, #9\n\t" - "orr r4, r4, r5, lsl 23\n\t" + "orr r4, r4, r5, lsl #23\n\t" "str r4, [%[a], #44]\n\t" "ldr r4, [%[a], #48]\n\t" "lsr r5, r5, #9\n\t" - "orr r5, r5, r4, lsl 23\n\t" + "orr r5, r5, r4, lsl #23\n\t" "str r5, [%[a], #48]\n\t" "ldr r5, [%[a], #52]\n\t" "lsr r4, r4, #9\n\t" - "orr r4, r4, r5, lsl 23\n\t" + "orr r4, r4, r5, lsl #23\n\t" "str r4, [%[a], #52]\n\t" "ldr r4, [%[a], #56]\n\t" "lsr r5, r5, #9\n\t" - "orr r5, r5, r4, lsl 23\n\t" + "orr r5, r5, r4, lsl #23\n\t" "str r5, [%[a], #56]\n\t" "ldr r5, [%[a], #60]\n\t" "lsr r4, r4, #9\n\t" - "orr r4, r4, r5, lsl 23\n\t" + "orr r4, r4, r5, lsl #23\n\t" "str r4, [%[a], #60]\n\t" "ldr r4, [%[a], #64]\n\t" "lsr r5, r5, #9\n\t" - "orr r5, r5, r4, lsl 23\n\t" + "orr r5, r5, r4, lsl #23\n\t" "str r5, [%[a], #64]\n\t" "lsr r4, r4, #9\n\t" "str r4, [%[a], #68]\n\t" @@ -51711,131 +50625,6 @@ static void sp_521_mont_tpl_17(sp_digit* r, const sp_digit* a, const sp_digit* m ); } -#ifdef WOLFSSL_SP_SMALL -/* Conditionally add a and b using the mask m. - * m is -1 to add and 0 when not. - * - * r A single precision number representing conditional add result. - * a A single precision number to add with. - * b A single precision number to add. - * m Mask value to apply. - */ -static sp_digit sp_521_cond_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) -{ - sp_digit c = 0; - - __asm__ __volatile__ ( - "mov r7, #0\n\t" - "mov r6, #0\n\t" - "1:\n\t" - "adds %[c], %[c], #-1\n\t" - "ldr r4, [%[a], r6]\n\t" - "ldr r5, [%[b], r6]\n\t" - "and r5, r5, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adc %[c], r7, r7\n\t" - "str r4, [%[r], r6]\n\t" - "add r6, r6, #4\n\t" - "cmp r6, #68\n\t" - "blt 1b\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7" - ); - - return c; -} -#endif /* WOLFSSL_SP_SMALL */ - -#ifndef WOLFSSL_SP_SMALL -/* Conditionally add a and b using the mask m. - * m is -1 to add and 0 when not. - * - * r A single precision number representing conditional add result. - * a A single precision number to add with. - * b A single precision number to add. - * m Mask value to apply. - */ -static sp_digit sp_521_cond_add_17(sp_digit* r, const sp_digit* a, const sp_digit* b, - sp_digit m) -{ - sp_digit c = 0; - - __asm__ __volatile__ ( - - "mov r8, #0\n\t" - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" - "and r6, r6, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adds r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" - "and r6, r6, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" - "and r6, r6, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" - "and r6, r6, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #24]\n\t" - "ldrd r4, r5, [%[a], #32]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" - "and r6, r6, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #40]\n\t" - "and r6, r6, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #40]\n\t" - "ldrd r4, r5, [%[a], #48]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" - "and r6, r6, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #56]\n\t" - "and r6, r6, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r6\n\t" - "adcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #56]\n\t" - "ldr r4, [%[a], #64]\n\t" - "ldr r6, [%[b], #64]\n\t" - "and r6, r6, %[m]\n\t" - "adcs r4, r4, r6\n\t" - "str r4, [%[r], #64]\n\t" - "adc %[c], r8, r8\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r5", "r6", "r7", "r8" - ); - - return c; -} -#endif /* !WOLFSSL_SP_SMALL */ - /* Subtract two Montgomery form numbers (r = a - b % m). * * r Result of subtration. @@ -51990,7 +50779,8 @@ static void sp_521_rshift1_17(sp_digit* r, const sp_digit* a) "str r4, [%[r], #56]\n\t" "orr r2, r2, r3, lsl #31\n\t" "lsr r3, r3, #1\n\t" - "strd r2, r3, [%[r], #60]\n\t" + "str r2, [%[r], #60]\n\t" + "str r3, [%[r], #64]\n\t" : : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4" @@ -52005,11 +50795,12 @@ static void sp_521_rshift1_17(sp_digit* r, const sp_digit* a) */ static void sp_521_div2_17(sp_digit* r, const sp_digit* a, const sp_digit* m) { - sp_digit o; + sp_digit o = a[0] & 1; + + (void)m; - o = sp_521_cond_add_17(r, a, m, 0 - (a[0] & 1)); sp_521_rshift1_17(r, r); - r[16] |= o << 31; + r[16] |= o << 8; } /* Double the Montgomery form projective point p. @@ -56176,44 +54967,36 @@ int sp_ecc_mulmod_base_add_521(const mp_int* km, const ecc_point* am, static void sp_521_add_one_17(sp_digit* a) { __asm__ __volatile__ ( - "ldrd r1, r2, [%[a], #0]\n\t" - "ldrd r3, r4, [%[a], #8]\n\t" - "adds r1, r1, #1\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" + "adds r2, r2, #1\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "strd r1, r2, [%[a], #0]\n\t" - "strd r3, r4, [%[a], #8]\n\t" - "ldrd r1, r2, [%[a], #16]\n\t" - "ldrd r3, r4, [%[a], #24]\n\t" - "adcs r1, r1, #0\n\t" + "adcs r5, r5, #0\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "strd r1, r2, [%[a], #16]\n\t" - "strd r3, r4, [%[a], #24]\n\t" - "ldrd r1, r2, [%[a], #32]\n\t" - "ldrd r3, r4, [%[a], #40]\n\t" - "adcs r1, r1, #0\n\t" + "adcs r5, r5, #0\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2, r3, r4, r5}\n\t" "adcs r2, r2, #0\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" - "strd r1, r2, [%[a], #32]\n\t" - "strd r3, r4, [%[a], #40]\n\t" - "ldrd r1, r2, [%[a], #48]\n\t" - "ldrd r3, r4, [%[a], #56]\n\t" - "adcs r1, r1, #0\n\t" + "adcs r5, r5, #0\n\t" + "stm %[a]!, {r2, r3, r4, r5}\n\t" + "ldm %[a], {r2}\n\t" "adcs r2, r2, #0\n\t" - "adcs r3, r3, #0\n\t" - "adcs r4, r4, #0\n\t" - "strd r1, r2, [%[a], #48]\n\t" - "strd r3, r4, [%[a], #56]\n\t" - "ldr r1, [%[a], #64]\n\t" - "adcs r1, r1, #0\n\t" - "str r1, [%[a], #64]\n\t" + "stm %[a]!, {r2}\n\t" + : [a] "+r" (a) : - : [a] "r" (a) - : "memory", "r1", "r2", "r3", "r4" + : "memory", "r2", "r3", "r4", "r5" ); } @@ -56656,7 +55439,8 @@ static void sp_521_lshift_17(sp_digit* r, const sp_digit* a, byte n) "lsl r2, r2, %[n]\n\t" "lsr r5, r5, r6\n\t" "orr r3, r3, r5\n\t" - "strd r2, r3, [%[r]]\n\t" + "str r2, [%[r], #0]\n\t" + "str r3, [%[r], #4]\n\t" : : [r] "r" (r), [a] "r" (a), [n] "r" (n) : "memory", "r2", "r3", "r4", "r5", "r6" @@ -56869,7 +55653,8 @@ static void sp_521_lshift_34(sp_digit* r, const sp_digit* a, byte n) "lsl r3, r3, %[n]\n\t" "lsr r5, r5, r6\n\t" "orr r4, r4, r5\n\t" - "strd r3, r4, [%[r]]\n\t" + "str r3, [%[r], #0]\n\t" + "str r4, [%[r], #4]\n\t" : : [r] "r" (r), [a] "r" (a), [n] "r" (n) : "memory", "r2", "r3", "r4", "r5", "r6" @@ -56891,28 +55676,25 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) "add r12, %[a], #64\n\t" "\n1:\n\t" "subs %[c], r14, %[c]\n\t" - "ldrd r3, r4, [%[a]]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[a]], #8\n\t" - "strd r5, r6, [%[a]], #8\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" "sbc %[c], r14, r14\n\t" "cmp %[a], r12\n\t" "bne 1b\n\t" "subs %[c], r14, %[c]\n\t" - "ldr r3, [%[a], 0]\n\t" - "ldr r7, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" - "str r3, [%[a]], #8\n\t" + "ldm %[a], {r4}\n\t" + "ldm %[b]!, {r8}\n\t" + "sbcs r4, r4, r8\n\t" + "stm %[a]!, {r4}\n\t" "sbc %[c], r14, r14\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" ); return c; @@ -56929,54 +55711,42 @@ static sp_digit sp_521_sub_in_place_17(sp_digit* a, const sp_digit* b) sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r2, r3, [%[a], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" - "ldrd r8, r9, [%[b], #8]\n\t" - "subs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #0]\n\t" - "strd r4, r5, [%[a], #8]\n\t" - "ldrd r2, r3, [%[a], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" - "ldrd r8, r9, [%[b], #24]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #16]\n\t" - "strd r4, r5, [%[a], #24]\n\t" - "ldrd r2, r3, [%[a], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" - "ldrd r8, r9, [%[b], #40]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #32]\n\t" - "strd r4, r5, [%[a], #40]\n\t" - "ldrd r2, r3, [%[a], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" - "ldrd r8, r9, [%[b], #56]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4}\n\t" + "ldm %[b]!, {r8}\n\t" "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #48]\n\t" - "strd r4, r5, [%[a], #56]\n\t" - "ldr r2, [%[a], #64]\n\t" - "ldr r6, [%[b], #64]\n\t" - "sbcs r2, r2, r6\n\t" - "str r2, [%[a], #64]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "stm %[a]!, {r4}\n\t" + "sbc %[c], r11, r11\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -57024,124 +55794,124 @@ static void sp_521_mul_d_17(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r10, #0\n\t" "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r3, r4, %[b], r8\n\t" "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" + "str r4, [%[r]], #4\n\t" "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" + "str r5, [%[r]], #4\n\t" "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" + "str r3, [%[r]], #4\n\t" "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" + "str r4, [%[r]], #4\n\t" "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" + "str r5, [%[r]], #4\n\t" "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" + "str r3, [%[r]], #4\n\t" "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" + "str r4, [%[r]], #4\n\t" "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" + "str r5, [%[r]], #4\n\t" "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" + "str r3, [%[r]], #4\n\t" "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" + "str r4, [%[r]], #4\n\t" "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #44]\n\t" + "str r5, [%[r]], #4\n\t" "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #48]\n\t" + "str r3, [%[r]], #4\n\t" "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #52]\n\t" + "str r4, [%[r]], #4\n\t" "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #56]\n\t" + "str r5, [%[r]], #4\n\t" "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #60]\n\t" + "str r3, [%[r]], #4\n\t" "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" - "str r4, [%[r], #64]\n\t" - "str r5, [%[r], #68]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); #endif @@ -62118,50 +60888,38 @@ static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "adds r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "adc %[c], r14, r14\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -62177,90 +60935,66 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r2, r3, [%[a], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" - "ldrd r8, r9, [%[b], #8]\n\t" - "subs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #0]\n\t" - "strd r4, r5, [%[a], #8]\n\t" - "ldrd r2, r3, [%[a], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" - "ldrd r8, r9, [%[b], #24]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #16]\n\t" - "strd r4, r5, [%[a], #24]\n\t" - "ldrd r2, r3, [%[a], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" - "ldrd r8, r9, [%[b], #40]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #32]\n\t" - "strd r4, r5, [%[a], #40]\n\t" - "ldrd r2, r3, [%[a], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" - "ldrd r8, r9, [%[b], #56]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #48]\n\t" - "strd r4, r5, [%[a], #56]\n\t" - "ldrd r2, r3, [%[a], #64]\n\t" - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" - "ldrd r8, r9, [%[b], #72]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #64]\n\t" - "strd r4, r5, [%[a], #72]\n\t" - "ldrd r2, r3, [%[a], #80]\n\t" - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" - "ldrd r8, r9, [%[b], #88]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #80]\n\t" - "strd r4, r5, [%[a], #88]\n\t" - "ldrd r2, r3, [%[a], #96]\n\t" - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" - "ldrd r8, r9, [%[b], #104]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #96]\n\t" - "strd r4, r5, [%[a], #104]\n\t" - "ldrd r2, r3, [%[a], #112]\n\t" - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" - "ldrd r8, r9, [%[b], #120]\n\t" - "sbcs r2, r2, r6\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "strd r2, r3, [%[a], #112]\n\t" - "strd r4, r5, [%[a], #120]\n\t" - "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [a] "r" (a), [b] "r" (b) - : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r11, r11\n\t" + : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -62279,90 +61013,66 @@ static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r14, #0\n\t" - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "adds r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "adds r4, r4, r8\n\t" + "adcs r5, r5, r9\n\t" + "adcs r6, r6, r10\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldrd r3, r4, [%[a], #64]\n\t" - "ldrd r5, r6, [%[a], #72]\n\t" - "ldrd r7, r8, [%[b], #64]\n\t" - "ldrd r9, r10, [%[b], #72]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #64]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "ldrd r3, r4, [%[a], #80]\n\t" - "ldrd r5, r6, [%[a], #88]\n\t" - "ldrd r7, r8, [%[b], #80]\n\t" - "ldrd r9, r10, [%[b], #88]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #80]\n\t" - "strd r5, r6, [%[r], #88]\n\t" - "ldrd r3, r4, [%[a], #96]\n\t" - "ldrd r5, r6, [%[a], #104]\n\t" - "ldrd r7, r8, [%[b], #96]\n\t" - "ldrd r9, r10, [%[b], #104]\n\t" - "adcs r3, r3, r7\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #96]\n\t" - "strd r5, r6, [%[r], #104]\n\t" - "ldrd r3, r4, [%[a], #112]\n\t" - "ldrd r5, r6, [%[a], #120]\n\t" - "ldrd r7, r8, [%[b], #112]\n\t" - "ldrd r9, r10, [%[b], #120]\n\t" - "adcs r3, r3, r7\n\t" - "adcs r4, r4, r8\n\t" - "adcs r5, r5, r9\n\t" - "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #112]\n\t" - "strd r5, r6, [%[r], #120]\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "adc %[c], r14, r14\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -62449,50 +61159,38 @@ static sp_digit sp_1024_sub_16(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "subs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c; @@ -62767,22 +61465,19 @@ static sp_digit sp_1024_sub_in_place_32(sp_digit* a, const sp_digit* b) "add r12, %[a], #128\n\t" "\n1:\n\t" "subs %[c], r14, %[c]\n\t" - "ldrd r3, r4, [%[a]]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a], {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[a]], #8\n\t" - "strd r5, r6, [%[a]], #8\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[a]!, {r4, r5, r6, r7}\n\t" "sbc %[c], r14, r14\n\t" "cmp %[a], r12\n\t" "bne 1b\n\t" : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r12", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14" ); return c; @@ -62825,121 +61520,121 @@ static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig __asm__ __volatile__ ( "mov r9, #0\n\t" - "ldrd r4, r5, [%[a], #0]\n\t" - "ldrd r6, r7, [%[b], #0]\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "subs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #0]\n\t" - "ldrd r4, r5, [%[a], #8]\n\t" - "ldrd r6, r7, [%[b], #8]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #8]\n\t" - "ldrd r4, r5, [%[a], #16]\n\t" - "ldrd r6, r7, [%[b], #16]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #16]\n\t" - "ldrd r4, r5, [%[a], #24]\n\t" - "ldrd r6, r7, [%[b], #24]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #24]\n\t" - "ldrd r4, r5, [%[a], #32]\n\t" - "ldrd r6, r7, [%[b], #32]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #32]\n\t" - "ldrd r4, r5, [%[a], #40]\n\t" - "ldrd r6, r7, [%[b], #40]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #40]\n\t" - "ldrd r4, r5, [%[a], #48]\n\t" - "ldrd r6, r7, [%[b], #48]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #48]\n\t" - "ldrd r4, r5, [%[a], #56]\n\t" - "ldrd r6, r7, [%[b], #56]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #56]\n\t" - "ldrd r4, r5, [%[a], #64]\n\t" - "ldrd r6, r7, [%[b], #64]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #64]\n\t" - "ldrd r4, r5, [%[a], #72]\n\t" - "ldrd r6, r7, [%[b], #72]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #72]\n\t" - "ldrd r4, r5, [%[a], #80]\n\t" - "ldrd r6, r7, [%[b], #80]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #80]\n\t" - "ldrd r4, r5, [%[a], #88]\n\t" - "ldrd r6, r7, [%[b], #88]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #88]\n\t" - "ldrd r4, r5, [%[a], #96]\n\t" - "ldrd r6, r7, [%[b], #96]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #96]\n\t" - "ldrd r4, r5, [%[a], #104]\n\t" - "ldrd r6, r7, [%[b], #104]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #104]\n\t" - "ldrd r4, r5, [%[a], #112]\n\t" - "ldrd r6, r7, [%[b], #112]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #112]\n\t" - "ldrd r4, r5, [%[a], #120]\n\t" - "ldrd r6, r7, [%[b], #120]\n\t" + "stm %[r]!, {r4, r5}\n\t" + "ldm %[a]!, {r4, r5}\n\t" + "ldm %[b]!, {r6, r7}\n\t" "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "sbcs r4, r4, r6\n\t" "sbcs r5, r5, r7\n\t" - "strd r4, r5, [%[r], #120]\n\t" + "stm %[r]!, {r4, r5}\n\t" "sbc %[c], r9, r9\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : [m] "r" (m) : "memory", "r4", "r5", "r6", "r7", "r8", "r9" ); #endif /* WOLFSSL_SP_SMALL */ @@ -62963,23 +61658,20 @@ static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a, "add r14, %[a], #128\n\t" "\n1:\n\t" "adds %[c], %[c], #-1\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "adcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "adcs r4, r4, r8\n\t" "adcs r5, r5, r9\n\t" "adcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "mov r3, #0\n\t" - "adc %[c], r3, #0\n\t" + "adcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "mov r4, #0\n\t" + "adc %[c], r4, #0\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -63027,229 +61719,229 @@ static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a, __asm__ __volatile__ ( "mov r10, #0\n\t" "# A[0] * B\n\t" - "ldr r8, [%[a]]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r3, r4, %[b], r8\n\t" "mov r5, #0\n\t" - "str r3, [%[r]]\n\t" + "str r3, [%[r]], #4\n\t" "# A[1] * B\n\t" - "ldr r8, [%[a], #4]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #4]\n\t" + "str r4, [%[r]], #4\n\t" "# A[2] * B\n\t" - "ldr r8, [%[a], #8]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #8]\n\t" + "str r5, [%[r]], #4\n\t" "# A[3] * B\n\t" - "ldr r8, [%[a], #12]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #12]\n\t" + "str r3, [%[r]], #4\n\t" "# A[4] * B\n\t" - "ldr r8, [%[a], #16]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #16]\n\t" + "str r4, [%[r]], #4\n\t" "# A[5] * B\n\t" - "ldr r8, [%[a], #20]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #20]\n\t" + "str r5, [%[r]], #4\n\t" "# A[6] * B\n\t" - "ldr r8, [%[a], #24]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #24]\n\t" + "str r3, [%[r]], #4\n\t" "# A[7] * B\n\t" - "ldr r8, [%[a], #28]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #28]\n\t" + "str r4, [%[r]], #4\n\t" "# A[8] * B\n\t" - "ldr r8, [%[a], #32]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #32]\n\t" + "str r5, [%[r]], #4\n\t" "# A[9] * B\n\t" - "ldr r8, [%[a], #36]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #36]\n\t" + "str r3, [%[r]], #4\n\t" "# A[10] * B\n\t" - "ldr r8, [%[a], #40]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #40]\n\t" + "str r4, [%[r]], #4\n\t" "# A[11] * B\n\t" - "ldr r8, [%[a], #44]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #44]\n\t" + "str r5, [%[r]], #4\n\t" "# A[12] * B\n\t" - "ldr r8, [%[a], #48]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #48]\n\t" + "str r3, [%[r]], #4\n\t" "# A[13] * B\n\t" - "ldr r8, [%[a], #52]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #52]\n\t" + "str r4, [%[r]], #4\n\t" "# A[14] * B\n\t" - "ldr r8, [%[a], #56]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #56]\n\t" + "str r5, [%[r]], #4\n\t" "# A[15] * B\n\t" - "ldr r8, [%[a], #60]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #60]\n\t" + "str r3, [%[r]], #4\n\t" "# A[16] * B\n\t" - "ldr r8, [%[a], #64]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #64]\n\t" + "str r4, [%[r]], #4\n\t" "# A[17] * B\n\t" - "ldr r8, [%[a], #68]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #68]\n\t" + "str r5, [%[r]], #4\n\t" "# A[18] * B\n\t" - "ldr r8, [%[a], #72]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #72]\n\t" + "str r3, [%[r]], #4\n\t" "# A[19] * B\n\t" - "ldr r8, [%[a], #76]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #76]\n\t" + "str r4, [%[r]], #4\n\t" "# A[20] * B\n\t" - "ldr r8, [%[a], #80]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #80]\n\t" + "str r5, [%[r]], #4\n\t" "# A[21] * B\n\t" - "ldr r8, [%[a], #84]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #84]\n\t" + "str r3, [%[r]], #4\n\t" "# A[22] * B\n\t" - "ldr r8, [%[a], #88]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #88]\n\t" + "str r4, [%[r]], #4\n\t" "# A[23] * B\n\t" - "ldr r8, [%[a], #92]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #92]\n\t" + "str r5, [%[r]], #4\n\t" "# A[24] * B\n\t" - "ldr r8, [%[a], #96]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #96]\n\t" + "str r3, [%[r]], #4\n\t" "# A[25] * B\n\t" - "ldr r8, [%[a], #100]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #100]\n\t" + "str r4, [%[r]], #4\n\t" "# A[26] * B\n\t" - "ldr r8, [%[a], #104]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #104]\n\t" + "str r5, [%[r]], #4\n\t" "# A[27] * B\n\t" - "ldr r8, [%[a], #108]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #108]\n\t" + "str r3, [%[r]], #4\n\t" "# A[28] * B\n\t" - "ldr r8, [%[a], #112]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" "adc r3, r10, r10\n\t" - "str r4, [%[r], #112]\n\t" + "str r4, [%[r]], #4\n\t" "# A[29] * B\n\t" - "ldr r8, [%[a], #116]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r5, r5, r6\n\t" "adcs r3, r3, r7\n\t" "adc r4, r10, r10\n\t" - "str r5, [%[r], #116]\n\t" + "str r5, [%[r]], #4\n\t" "# A[30] * B\n\t" - "ldr r8, [%[a], #120]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r3, r3, r6\n\t" "adcs r4, r4, r7\n\t" "adc r5, r10, r10\n\t" - "str r3, [%[r], #120]\n\t" + "str r3, [%[r]], #4\n\t" "# A[31] * B\n\t" - "ldr r8, [%[a], #124]\n\t" + "ldr r8, [%[a]], #4\n\t" "umull r6, r7, %[b], r8\n\t" "adds r4, r4, r6\n\t" "adc r5, r5, r7\n\t" - "str r4, [%[r], #124]\n\t" - "str r5, [%[r], #128]\n\t" - : - : [r] "r" (r), [a] "r" (a), [b] "r" (b) + "str r4, [%[r]], #4\n\t" + "str r5, [%[r]]\n\t" + : [r] "+r" (r), [a] "+r" (a) + : [b] "r" (b) : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" ); #endif @@ -65628,7 +64320,8 @@ static void sp_1024_rshift1_32(sp_digit* r, const sp_digit* a) "str r4, [%[r], #116]\n\t" "orr r2, r2, r3, lsl #31\n\t" "lsr r3, r3, #1\n\t" - "strd r2, r3, [%[r], #120]\n\t" + "str r2, [%[r], #120]\n\t" + "str r3, [%[r], #124]\n\t" : : [r] "r" (r), [a] "r" (a) : "memory", "r2", "r3", "r4" @@ -65862,22 +64555,19 @@ static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, "add r14, %[a], #128\n\t" "\n1:\n\t" "rsbs %[c], %[c], #0\n\t" - "ldrd r3, r4, [%[a]], #8\n\t" - "ldrd r5, r6, [%[a]], #8\n\t" - "ldrd r7, r8, [%[b]], #8\n\t" - "ldrd r9, r10, [%[b]], #8\n\t" - "sbcs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r]], #8\n\t" - "strd r5, r6, [%[r]], #8\n\t" - "sbc %[c], r3, r3\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "sbc %[c], r4, r4\n\t" "cmp %[a], r14\n\t" "bne 1b\n\t" : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) : - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14" + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" ); return c; @@ -65896,90 +64586,66 @@ static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a, sp_digit c = 0; __asm__ __volatile__ ( - "ldrd r3, r4, [%[a], #0]\n\t" - "ldrd r5, r6, [%[a], #8]\n\t" - "ldrd r7, r8, [%[b], #0]\n\t" - "ldrd r9, r10, [%[b], #8]\n\t" - "subs r3, r3, r7\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" + "subs r4, r4, r8\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, r10\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #0]\n\t" - "strd r5, r6, [%[r], #8]\n\t" - "ldrd r3, r4, [%[a], #16]\n\t" - "ldrd r5, r6, [%[a], #24]\n\t" - "ldrd r7, r8, [%[b], #16]\n\t" - "ldrd r9, r10, [%[b], #24]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #16]\n\t" - "strd r5, r6, [%[r], #24]\n\t" - "ldrd r3, r4, [%[a], #32]\n\t" - "ldrd r5, r6, [%[a], #40]\n\t" - "ldrd r7, r8, [%[b], #32]\n\t" - "ldrd r9, r10, [%[b], #40]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #32]\n\t" - "strd r5, r6, [%[r], #40]\n\t" - "ldrd r3, r4, [%[a], #48]\n\t" - "ldrd r5, r6, [%[a], #56]\n\t" - "ldrd r7, r8, [%[b], #48]\n\t" - "ldrd r9, r10, [%[b], #56]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #48]\n\t" - "strd r5, r6, [%[r], #56]\n\t" - "ldrd r3, r4, [%[a], #64]\n\t" - "ldrd r5, r6, [%[a], #72]\n\t" - "ldrd r7, r8, [%[b], #64]\n\t" - "ldrd r9, r10, [%[b], #72]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #64]\n\t" - "strd r5, r6, [%[r], #72]\n\t" - "ldrd r3, r4, [%[a], #80]\n\t" - "ldrd r5, r6, [%[a], #88]\n\t" - "ldrd r7, r8, [%[b], #80]\n\t" - "ldrd r9, r10, [%[b], #88]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #80]\n\t" - "strd r5, r6, [%[r], #88]\n\t" - "ldrd r3, r4, [%[a], #96]\n\t" - "ldrd r5, r6, [%[a], #104]\n\t" - "ldrd r7, r8, [%[b], #96]\n\t" - "ldrd r9, r10, [%[b], #104]\n\t" - "sbcs r3, r3, r7\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" + "ldm %[a]!, {r4, r5, r6, r7}\n\t" + "ldm %[b]!, {r8, r9, r10, r11}\n\t" "sbcs r4, r4, r8\n\t" "sbcs r5, r5, r9\n\t" "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #96]\n\t" - "strd r5, r6, [%[r], #104]\n\t" - "ldrd r3, r4, [%[a], #112]\n\t" - "ldrd r5, r6, [%[a], #120]\n\t" - "ldrd r7, r8, [%[b], #112]\n\t" - "ldrd r9, r10, [%[b], #120]\n\t" - "sbcs r3, r3, r7\n\t" - "sbcs r4, r4, r8\n\t" - "sbcs r5, r5, r9\n\t" - "sbcs r6, r6, r10\n\t" - "strd r3, r4, [%[r], #112]\n\t" - "strd r5, r6, [%[r], #120]\n\t" + "sbcs r7, r7, r11\n\t" + "stm %[r]!, {r4, r5, r6, r7}\n\t" "sbc %[c], %[c], #0\n\t" - : [c] "+r" (c) - : [r] "r" (r), [a] "r" (a), [b] "r" (b) - : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10" + : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b) + : + : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" ); return c;