diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index 1bb0ba4bf..916f06d26 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -2223,10 +2223,21 @@ static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a) "ble 1b\n\t" "str r6, [sp, r5]\n\t" "\n4:\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r9, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r9, [%[r], #12]\n\t" +#else "ldrd r6, r7, [sp, #0]\n\t" "ldrd r8, r9, [sp, #8]\n\t" "strd r6, r7, [%[r], #0]\n\t" "strd r8, r9, [%[r], #8]\n\t" +#endif "add sp, sp, #16\n\t" "add %[r], %[r], #16\n\t" "subs r5, r5, #16\n\t" @@ -2444,10 +2455,21 @@ static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a) "ble 1b\n\t" "str r6, [sp, r5]\n\t" "\n4:\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r9, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r9, [%[r], #12]\n\t" +#else "ldrd r6, r7, [sp, #0]\n\t" "ldrd r8, r9, [sp, #8]\n\t" "strd r6, r7, [%[r], #0]\n\t" "strd r8, r9, [%[r], #8]\n\t" +#endif "add sp, sp, #16\n\t" "add %[r], %[r], #16\n\t" "subs r5, r5, #16\n\t" @@ -7201,118 +7223,310 @@ static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_dig __asm__ __volatile__ ( "mov r8, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" +#else "ldrd r4, r5, [%[a], #0]\n\t" "ldrd r6, r7, [%[b], #0]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" +#else "strd r4, r5, [%[r], #0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" +#else "ldrd r4, r5, [%[a], #8]\n\t" "ldrd r6, r7, [%[b], #8]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" +#else "strd r4, r5, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" +#else "ldrd r4, r5, [%[a], #16]\n\t" "ldrd r6, r7, [%[b], #16]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" +#else "strd r4, r5, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" +#else "ldrd r4, r5, [%[a], #24]\n\t" "ldrd r6, r7, [%[b], #24]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" +#else "strd r4, r5, [%[r], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" +#else "ldrd r4, r5, [%[a], #32]\n\t" "ldrd r6, r7, [%[b], #32]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" +#else "strd r4, r5, [%[r], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" +#else "ldrd r4, r5, [%[a], #40]\n\t" "ldrd r6, r7, [%[b], #40]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #40]\n\t" + "str r5, [%[r], #44]\n\t" +#else "strd r4, r5, [%[r], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "ldr r6, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" +#else "ldrd r4, r5, [%[a], #48]\n\t" "ldrd r6, r7, [%[b], #48]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #48]\n\t" + "str r5, [%[r], #52]\n\t" +#else "strd r4, r5, [%[r], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[a], #60]\n\t" + "ldr r6, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" +#else "ldrd r4, r5, [%[a], #56]\n\t" "ldrd r6, r7, [%[b], #56]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #56]\n\t" + "str r5, [%[r], #60]\n\t" +#else "strd r4, r5, [%[r], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[a], #68]\n\t" + "ldr r6, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" +#else "ldrd r4, r5, [%[a], #64]\n\t" "ldrd r6, r7, [%[b], #64]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #64]\n\t" + "str r5, [%[r], #68]\n\t" +#else "strd r4, r5, [%[r], #64]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[a], #76]\n\t" + "ldr r6, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" +#else "ldrd r4, r5, [%[a], #72]\n\t" "ldrd r6, r7, [%[b], #72]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #72]\n\t" + "str r5, [%[r], #76]\n\t" +#else "strd r4, r5, [%[r], #72]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[a], #84]\n\t" + "ldr r6, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" +#else "ldrd r4, r5, [%[a], #80]\n\t" "ldrd r6, r7, [%[b], #80]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #80]\n\t" + "str r5, [%[r], #84]\n\t" +#else "strd r4, r5, [%[r], #80]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[a], #92]\n\t" + "ldr r6, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" +#else "ldrd r4, r5, [%[a], #88]\n\t" "ldrd r6, r7, [%[b], #88]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #88]\n\t" + "str r5, [%[r], #92]\n\t" +#else "strd r4, r5, [%[r], #88]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[a], #100]\n\t" + "ldr r6, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" +#else "ldrd r4, r5, [%[a], #96]\n\t" "ldrd r6, r7, [%[b], #96]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #96]\n\t" + "str r5, [%[r], #100]\n\t" +#else "strd r4, r5, [%[r], #96]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[a], #108]\n\t" + "ldr r6, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" +#else "ldrd r4, r5, [%[a], #104]\n\t" "ldrd r6, r7, [%[b], #104]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #104]\n\t" + "str r5, [%[r], #108]\n\t" +#else "strd r4, r5, [%[r], #104]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[a], #116]\n\t" + "ldr r6, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" +#else "ldrd r4, r5, [%[a], #112]\n\t" "ldrd r6, r7, [%[b], #112]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #112]\n\t" + "str r5, [%[r], #116]\n\t" +#else "strd r4, r5, [%[r], #112]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[a], #124]\n\t" + "ldr r6, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" +#else "ldrd r4, r5, [%[a], #120]\n\t" "ldrd r6, r7, [%[b], #120]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #120]\n\t" + "str r5, [%[r], #124]\n\t" +#else "strd r4, r5, [%[r], #120]\n\t" +#endif "adc %[c], r8, r8\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) @@ -11585,10 +11799,21 @@ static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a) "ble 1b\n\t" "str r6, [sp, r5]\n\t" "\n4:\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r9, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r9, [%[r], #12]\n\t" +#else "ldrd r6, r7, [sp, #0]\n\t" "ldrd r8, r9, [sp, #8]\n\t" "strd r6, r7, [%[r], #0]\n\t" "strd r8, r9, [%[r], #8]\n\t" +#endif "add sp, sp, #16\n\t" "add %[r], %[r], #16\n\t" "subs r5, r5, #16\n\t" @@ -11806,10 +12031,21 @@ static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a) "ble 1b\n\t" "str r6, [sp, r5]\n\t" "\n4:\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r9, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r9, [%[r], #12]\n\t" +#else "ldrd r6, r7, [sp, #0]\n\t" "ldrd r8, r9, [sp, #8]\n\t" "strd r6, r7, [%[r], #0]\n\t" "strd r8, r9, [%[r], #8]\n\t" +#endif "add sp, sp, #16\n\t" "add %[r], %[r], #16\n\t" "subs r5, r5, #16\n\t" @@ -18083,174 +18319,462 @@ static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_dig __asm__ __volatile__ ( "mov r8, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" +#else "ldrd r4, r5, [%[a], #0]\n\t" "ldrd r6, r7, [%[b], #0]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" +#else "strd r4, r5, [%[r], #0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" +#else "ldrd r4, r5, [%[a], #8]\n\t" "ldrd r6, r7, [%[b], #8]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" +#else "strd r4, r5, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" +#else "ldrd r4, r5, [%[a], #16]\n\t" "ldrd r6, r7, [%[b], #16]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" +#else "strd r4, r5, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" +#else "ldrd r4, r5, [%[a], #24]\n\t" "ldrd r6, r7, [%[b], #24]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" +#else "strd r4, r5, [%[r], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" +#else "ldrd r4, r5, [%[a], #32]\n\t" "ldrd r6, r7, [%[b], #32]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" +#else "strd r4, r5, [%[r], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" +#else "ldrd r4, r5, [%[a], #40]\n\t" "ldrd r6, r7, [%[b], #40]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #40]\n\t" + "str r5, [%[r], #44]\n\t" +#else "strd r4, r5, [%[r], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "ldr r6, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" +#else "ldrd r4, r5, [%[a], #48]\n\t" "ldrd r6, r7, [%[b], #48]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #48]\n\t" + "str r5, [%[r], #52]\n\t" +#else "strd r4, r5, [%[r], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[a], #60]\n\t" + "ldr r6, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" +#else "ldrd r4, r5, [%[a], #56]\n\t" "ldrd r6, r7, [%[b], #56]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #56]\n\t" + "str r5, [%[r], #60]\n\t" +#else "strd r4, r5, [%[r], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[a], #68]\n\t" + "ldr r6, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" +#else "ldrd r4, r5, [%[a], #64]\n\t" "ldrd r6, r7, [%[b], #64]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #64]\n\t" + "str r5, [%[r], #68]\n\t" +#else "strd r4, r5, [%[r], #64]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[a], #76]\n\t" + "ldr r6, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" +#else "ldrd r4, r5, [%[a], #72]\n\t" "ldrd r6, r7, [%[b], #72]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #72]\n\t" + "str r5, [%[r], #76]\n\t" +#else "strd r4, r5, [%[r], #72]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[a], #84]\n\t" + "ldr r6, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" +#else "ldrd r4, r5, [%[a], #80]\n\t" "ldrd r6, r7, [%[b], #80]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #80]\n\t" + "str r5, [%[r], #84]\n\t" +#else "strd r4, r5, [%[r], #80]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[a], #92]\n\t" + "ldr r6, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" +#else "ldrd r4, r5, [%[a], #88]\n\t" "ldrd r6, r7, [%[b], #88]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #88]\n\t" + "str r5, [%[r], #92]\n\t" +#else "strd r4, r5, [%[r], #88]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[a], #100]\n\t" + "ldr r6, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" +#else "ldrd r4, r5, [%[a], #96]\n\t" "ldrd r6, r7, [%[b], #96]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #96]\n\t" + "str r5, [%[r], #100]\n\t" +#else "strd r4, r5, [%[r], #96]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[a], #108]\n\t" + "ldr r6, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" +#else "ldrd r4, r5, [%[a], #104]\n\t" "ldrd r6, r7, [%[b], #104]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #104]\n\t" + "str r5, [%[r], #108]\n\t" +#else "strd r4, r5, [%[r], #104]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[a], #116]\n\t" + "ldr r6, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" +#else "ldrd r4, r5, [%[a], #112]\n\t" "ldrd r6, r7, [%[b], #112]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #112]\n\t" + "str r5, [%[r], #116]\n\t" +#else "strd r4, r5, [%[r], #112]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[a], #124]\n\t" + "ldr r6, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" +#else "ldrd r4, r5, [%[a], #120]\n\t" "ldrd r6, r7, [%[b], #120]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #120]\n\t" + "str r5, [%[r], #124]\n\t" +#else "strd r4, r5, [%[r], #120]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #128]\n\t" + "ldr r5, [%[a], #132]\n\t" + "ldr r6, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" +#else "ldrd r4, r5, [%[a], #128]\n\t" "ldrd r6, r7, [%[b], #128]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #128]\n\t" + "str r5, [%[r], #132]\n\t" +#else "strd r4, r5, [%[r], #128]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #136]\n\t" + "ldr r5, [%[a], #140]\n\t" + "ldr r6, [%[b], #136]\n\t" + "ldr r7, [%[b], #140]\n\t" +#else "ldrd r4, r5, [%[a], #136]\n\t" "ldrd r6, r7, [%[b], #136]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #136]\n\t" + "str r5, [%[r], #140]\n\t" +#else "strd r4, r5, [%[r], #136]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #144]\n\t" + "ldr r5, [%[a], #148]\n\t" + "ldr r6, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" +#else "ldrd r4, r5, [%[a], #144]\n\t" "ldrd r6, r7, [%[b], #144]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #144]\n\t" + "str r5, [%[r], #148]\n\t" +#else "strd r4, r5, [%[r], #144]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #152]\n\t" + "ldr r5, [%[a], #156]\n\t" + "ldr r6, [%[b], #152]\n\t" + "ldr r7, [%[b], #156]\n\t" +#else "ldrd r4, r5, [%[a], #152]\n\t" "ldrd r6, r7, [%[b], #152]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #152]\n\t" + "str r5, [%[r], #156]\n\t" +#else "strd r4, r5, [%[r], #152]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #160]\n\t" + "ldr r5, [%[a], #164]\n\t" + "ldr r6, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" +#else "ldrd r4, r5, [%[a], #160]\n\t" "ldrd r6, r7, [%[b], #160]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #160]\n\t" + "str r5, [%[r], #164]\n\t" +#else "strd r4, r5, [%[r], #160]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #168]\n\t" + "ldr r5, [%[a], #172]\n\t" + "ldr r6, [%[b], #168]\n\t" + "ldr r7, [%[b], #172]\n\t" +#else "ldrd r4, r5, [%[a], #168]\n\t" "ldrd r6, r7, [%[b], #168]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #168]\n\t" + "str r5, [%[r], #172]\n\t" +#else "strd r4, r5, [%[r], #168]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #176]\n\t" + "ldr r5, [%[a], #180]\n\t" + "ldr r6, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" +#else "ldrd r4, r5, [%[a], #176]\n\t" "ldrd r6, r7, [%[b], #176]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #176]\n\t" + "str r5, [%[r], #180]\n\t" +#else "strd r4, r5, [%[r], #176]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #184]\n\t" + "ldr r5, [%[a], #188]\n\t" + "ldr r6, [%[b], #184]\n\t" + "ldr r7, [%[b], #188]\n\t" +#else "ldrd r4, r5, [%[a], #184]\n\t" "ldrd r6, r7, [%[b], #184]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #184]\n\t" + "str r5, [%[r], #188]\n\t" +#else "strd r4, r5, [%[r], #184]\n\t" +#endif "adc %[c], r8, r8\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) @@ -20321,10 +20845,21 @@ static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a) "ble 1b\n\t" "str r6, [sp, r5]\n\t" "\n4:\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r9, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r9, [%[r], #12]\n\t" +#else "ldrd r6, r7, [sp, #0]\n\t" "ldrd r8, r9, [sp, #8]\n\t" "strd r6, r7, [%[r], #0]\n\t" "strd r8, r9, [%[r], #8]\n\t" +#endif "add sp, sp, #16\n\t" "add %[r], %[r], #16\n\t" "subs r5, r5, #16\n\t" @@ -25501,230 +26036,614 @@ static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_dig __asm__ __volatile__ ( "mov r8, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" +#else "ldrd r4, r5, [%[a], #0]\n\t" "ldrd r6, r7, [%[b], #0]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" +#else "strd r4, r5, [%[r], #0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" +#else "ldrd r4, r5, [%[a], #8]\n\t" "ldrd r6, r7, [%[b], #8]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" +#else "strd r4, r5, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" +#else "ldrd r4, r5, [%[a], #16]\n\t" "ldrd r6, r7, [%[b], #16]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" +#else "strd r4, r5, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" +#else "ldrd r4, r5, [%[a], #24]\n\t" "ldrd r6, r7, [%[b], #24]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" +#else "strd r4, r5, [%[r], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" +#else "ldrd r4, r5, [%[a], #32]\n\t" "ldrd r6, r7, [%[b], #32]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" +#else "strd r4, r5, [%[r], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" +#else "ldrd r4, r5, [%[a], #40]\n\t" "ldrd r6, r7, [%[b], #40]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #40]\n\t" + "str r5, [%[r], #44]\n\t" +#else "strd r4, r5, [%[r], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "ldr r6, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" +#else "ldrd r4, r5, [%[a], #48]\n\t" "ldrd r6, r7, [%[b], #48]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #48]\n\t" + "str r5, [%[r], #52]\n\t" +#else "strd r4, r5, [%[r], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[a], #60]\n\t" + "ldr r6, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" +#else "ldrd r4, r5, [%[a], #56]\n\t" "ldrd r6, r7, [%[b], #56]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #56]\n\t" + "str r5, [%[r], #60]\n\t" +#else "strd r4, r5, [%[r], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[a], #68]\n\t" + "ldr r6, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" +#else "ldrd r4, r5, [%[a], #64]\n\t" "ldrd r6, r7, [%[b], #64]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #64]\n\t" + "str r5, [%[r], #68]\n\t" +#else "strd r4, r5, [%[r], #64]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[a], #76]\n\t" + "ldr r6, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" +#else "ldrd r4, r5, [%[a], #72]\n\t" "ldrd r6, r7, [%[b], #72]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #72]\n\t" + "str r5, [%[r], #76]\n\t" +#else "strd r4, r5, [%[r], #72]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[a], #84]\n\t" + "ldr r6, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" +#else "ldrd r4, r5, [%[a], #80]\n\t" "ldrd r6, r7, [%[b], #80]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #80]\n\t" + "str r5, [%[r], #84]\n\t" +#else "strd r4, r5, [%[r], #80]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[a], #92]\n\t" + "ldr r6, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" +#else "ldrd r4, r5, [%[a], #88]\n\t" "ldrd r6, r7, [%[b], #88]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #88]\n\t" + "str r5, [%[r], #92]\n\t" +#else "strd r4, r5, [%[r], #88]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[a], #100]\n\t" + "ldr r6, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" +#else "ldrd r4, r5, [%[a], #96]\n\t" "ldrd r6, r7, [%[b], #96]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #96]\n\t" + "str r5, [%[r], #100]\n\t" +#else "strd r4, r5, [%[r], #96]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[a], #108]\n\t" + "ldr r6, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" +#else "ldrd r4, r5, [%[a], #104]\n\t" "ldrd r6, r7, [%[b], #104]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #104]\n\t" + "str r5, [%[r], #108]\n\t" +#else "strd r4, r5, [%[r], #104]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[a], #116]\n\t" + "ldr r6, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" +#else "ldrd r4, r5, [%[a], #112]\n\t" "ldrd r6, r7, [%[b], #112]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #112]\n\t" + "str r5, [%[r], #116]\n\t" +#else "strd r4, r5, [%[r], #112]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[a], #124]\n\t" + "ldr r6, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" +#else "ldrd r4, r5, [%[a], #120]\n\t" "ldrd r6, r7, [%[b], #120]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #120]\n\t" + "str r5, [%[r], #124]\n\t" +#else "strd r4, r5, [%[r], #120]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #128]\n\t" + "ldr r5, [%[a], #132]\n\t" + "ldr r6, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" +#else "ldrd r4, r5, [%[a], #128]\n\t" "ldrd r6, r7, [%[b], #128]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #128]\n\t" + "str r5, [%[r], #132]\n\t" +#else "strd r4, r5, [%[r], #128]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #136]\n\t" + "ldr r5, [%[a], #140]\n\t" + "ldr r6, [%[b], #136]\n\t" + "ldr r7, [%[b], #140]\n\t" +#else "ldrd r4, r5, [%[a], #136]\n\t" "ldrd r6, r7, [%[b], #136]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #136]\n\t" + "str r5, [%[r], #140]\n\t" +#else "strd r4, r5, [%[r], #136]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #144]\n\t" + "ldr r5, [%[a], #148]\n\t" + "ldr r6, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" +#else "ldrd r4, r5, [%[a], #144]\n\t" "ldrd r6, r7, [%[b], #144]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #144]\n\t" + "str r5, [%[r], #148]\n\t" +#else "strd r4, r5, [%[r], #144]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #152]\n\t" + "ldr r5, [%[a], #156]\n\t" + "ldr r6, [%[b], #152]\n\t" + "ldr r7, [%[b], #156]\n\t" +#else "ldrd r4, r5, [%[a], #152]\n\t" "ldrd r6, r7, [%[b], #152]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #152]\n\t" + "str r5, [%[r], #156]\n\t" +#else "strd r4, r5, [%[r], #152]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #160]\n\t" + "ldr r5, [%[a], #164]\n\t" + "ldr r6, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" +#else "ldrd r4, r5, [%[a], #160]\n\t" "ldrd r6, r7, [%[b], #160]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #160]\n\t" + "str r5, [%[r], #164]\n\t" +#else "strd r4, r5, [%[r], #160]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #168]\n\t" + "ldr r5, [%[a], #172]\n\t" + "ldr r6, [%[b], #168]\n\t" + "ldr r7, [%[b], #172]\n\t" +#else "ldrd r4, r5, [%[a], #168]\n\t" "ldrd r6, r7, [%[b], #168]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #168]\n\t" + "str r5, [%[r], #172]\n\t" +#else "strd r4, r5, [%[r], #168]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #176]\n\t" + "ldr r5, [%[a], #180]\n\t" + "ldr r6, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" +#else "ldrd r4, r5, [%[a], #176]\n\t" "ldrd r6, r7, [%[b], #176]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #176]\n\t" + "str r5, [%[r], #180]\n\t" +#else "strd r4, r5, [%[r], #176]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #184]\n\t" + "ldr r5, [%[a], #188]\n\t" + "ldr r6, [%[b], #184]\n\t" + "ldr r7, [%[b], #188]\n\t" +#else "ldrd r4, r5, [%[a], #184]\n\t" "ldrd r6, r7, [%[b], #184]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #184]\n\t" + "str r5, [%[r], #188]\n\t" +#else "strd r4, r5, [%[r], #184]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #192]\n\t" + "ldr r5, [%[a], #196]\n\t" + "ldr r6, [%[b], #192]\n\t" + "ldr r7, [%[b], #196]\n\t" +#else "ldrd r4, r5, [%[a], #192]\n\t" "ldrd r6, r7, [%[b], #192]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #192]\n\t" + "str r5, [%[r], #196]\n\t" +#else "strd r4, r5, [%[r], #192]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #200]\n\t" + "ldr r5, [%[a], #204]\n\t" + "ldr r6, [%[b], #200]\n\t" + "ldr r7, [%[b], #204]\n\t" +#else "ldrd r4, r5, [%[a], #200]\n\t" "ldrd r6, r7, [%[b], #200]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #200]\n\t" + "str r5, [%[r], #204]\n\t" +#else "strd r4, r5, [%[r], #200]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #208]\n\t" + "ldr r5, [%[a], #212]\n\t" + "ldr r6, [%[b], #208]\n\t" + "ldr r7, [%[b], #212]\n\t" +#else "ldrd r4, r5, [%[a], #208]\n\t" "ldrd r6, r7, [%[b], #208]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #208]\n\t" + "str r5, [%[r], #212]\n\t" +#else "strd r4, r5, [%[r], #208]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #216]\n\t" + "ldr r5, [%[a], #220]\n\t" + "ldr r6, [%[b], #216]\n\t" + "ldr r7, [%[b], #220]\n\t" +#else "ldrd r4, r5, [%[a], #216]\n\t" "ldrd r6, r7, [%[b], #216]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #216]\n\t" + "str r5, [%[r], #220]\n\t" +#else "strd r4, r5, [%[r], #216]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #224]\n\t" + "ldr r5, [%[a], #228]\n\t" + "ldr r6, [%[b], #224]\n\t" + "ldr r7, [%[b], #228]\n\t" +#else "ldrd r4, r5, [%[a], #224]\n\t" "ldrd r6, r7, [%[b], #224]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #224]\n\t" + "str r5, [%[r], #228]\n\t" +#else "strd r4, r5, [%[r], #224]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #232]\n\t" + "ldr r5, [%[a], #236]\n\t" + "ldr r6, [%[b], #232]\n\t" + "ldr r7, [%[b], #236]\n\t" +#else "ldrd r4, r5, [%[a], #232]\n\t" "ldrd r6, r7, [%[b], #232]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #232]\n\t" + "str r5, [%[r], #236]\n\t" +#else "strd r4, r5, [%[r], #232]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #240]\n\t" + "ldr r5, [%[a], #244]\n\t" + "ldr r6, [%[b], #240]\n\t" + "ldr r7, [%[b], #244]\n\t" +#else "ldrd r4, r5, [%[a], #240]\n\t" "ldrd r6, r7, [%[b], #240]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #240]\n\t" + "str r5, [%[r], #244]\n\t" +#else "strd r4, r5, [%[r], #240]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #248]\n\t" + "ldr r5, [%[a], #252]\n\t" + "ldr r6, [%[b], #248]\n\t" + "ldr r7, [%[b], #252]\n\t" +#else "ldrd r4, r5, [%[a], #248]\n\t" "ldrd r6, r7, [%[b], #248]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #248]\n\t" + "str r5, [%[r], #252]\n\t" +#else "strd r4, r5, [%[r], #248]\n\t" +#endif "adc %[c], r8, r8\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) @@ -27680,10 +28599,21 @@ static void sp_256_sqr_8(sp_digit* r, const sp_digit* a) "ble 1b\n\t" "str r6, [sp, r5]\n\t" "\n4:\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r9, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r9, [%[r], #12]\n\t" +#else "ldrd r6, r7, [sp, #0]\n\t" "ldrd r8, r9, [sp, #8]\n\t" "strd r6, r7, [%[r], #0]\n\t" "strd r8, r9, [%[r], #8]\n\t" +#endif "add sp, sp, #16\n\t" "add %[r], %[r], #16\n\t" "subs r5, r5, #16\n\t" @@ -30547,8 +31477,15 @@ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) { __asm__ __volatile__ ( "mov r10, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[a], #8]\n\t" + "ldr r7, [%[a], #12]\n\t" +#else "ldrd r4, r5, [%[a], #0]\n\t" "ldrd r6, r7, [%[a], #8]\n\t" +#endif "and r14, r4, #1\n\t" "sub r8, r10, r14\n\t" "and r9, r8, #1\n\t" @@ -30556,10 +31493,24 @@ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) "adcs r5, r5, r8\n\t" "adcs r6, r6, r8\n\t" "adcs r7, r7, r10\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" + "str r6, [%[r], #8]\n\t" + "str r7, [%[r], #12]\n\t" +#else "strd r4, r5, [%[r], #0]\n\t" "strd r6, r7, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" +#else "ldrd r4, r5, [%[a], #16]\n\t" "ldrd r6, r7, [%[a], #24]\n\t" +#endif "adcs r4, r4, r10\n\t" "adcs r5, r5, r10\n\t" "adcs r6, r6, r9\n\t" @@ -30574,10 +31525,24 @@ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) "orr r10, r10, r7, lsl #31\n\t" "orr r11, r11, r14, lsl #31\n\t" "mov r14, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #16]\n\t" + "str r9, [%[r], #20]\n\t" + "str r10, [%[r], #24]\n\t" + "str r11, [%[r], #28]\n\t" +#else "strd r8, r9, [%[r], #16]\n\t" "strd r10, r11, [%[r], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r], #0]\n\t" + "ldr r5, [%[r], #4]\n\t" + "ldr r6, [%[r], #8]\n\t" + "ldr r7, [%[r], #12]\n\t" +#else "ldrd r4, r5, [%[r], #0]\n\t" "ldrd r6, r7, [%[r], #8]\n\t" +#endif "lsr r8, r4, #1\n\t" "lsr r9, r5, #1\n\t" "lsr r10, r6, #1\n\t" @@ -30586,8 +31551,15 @@ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) "orr r9, r9, r6, lsl #31\n\t" "orr r10, r10, r7, lsl #31\n\t" "orr r11, r11, r14, lsl #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #0]\n\t" + "str r9, [%[r], #4]\n\t" + "str r10, [%[r], #8]\n\t" + "str r11, [%[r], #12]\n\t" +#else "strd r8, r9, [%[r], #0]\n\t" "strd r10, r11, [%[r], #8]\n\t" +#endif : : [r] "r" (r), [a] "r" (a), [m] "r" (m) : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" @@ -35214,8 +36186,15 @@ static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a) __asm__ __volatile__ ( "mov r10, #0\n\t" "mov r14, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[a], #24]\n\t" + "ldr r7, [%[a], #28]\n\t" +#else "ldrd r4, r5, [%[a], #16]\n\t" "ldrd r6, r7, [%[a], #24]\n\t" +#endif "lsr r8, r4, #1\n\t" "lsr r9, r5, #1\n\t" "lsr r10, r6, #1\n\t" @@ -35225,10 +36204,24 @@ static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a) "orr r10, r10, r7, lsl #31\n\t" "orr r11, r11, r14, lsl #31\n\t" "mov r14, r4\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #16]\n\t" + "str r9, [%[r], #20]\n\t" + "str r10, [%[r], #24]\n\t" + "str r11, [%[r], #28]\n\t" +#else "strd r8, r9, [%[r], #16]\n\t" "strd r10, r11, [%[r], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[r], #0]\n\t" + "ldr r5, [%[r], #4]\n\t" + "ldr r6, [%[r], #8]\n\t" + "ldr r7, [%[r], #12]\n\t" +#else "ldrd r4, r5, [%[r], #0]\n\t" "ldrd r6, r7, [%[r], #8]\n\t" +#endif "lsr r8, r4, #1\n\t" "lsr r9, r5, #1\n\t" "lsr r10, r6, #1\n\t" @@ -35237,8 +36230,15 @@ static void sp_256_rshift1_8(sp_digit* r, const sp_digit* a) "orr r9, r9, r6, lsl #31\n\t" "orr r10, r10, r7, lsl #31\n\t" "orr r11, r11, r14, lsl #31\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r8, [%[r], #0]\n\t" + "str r9, [%[r], #4]\n\t" + "str r10, [%[r], #8]\n\t" + "str r11, [%[r], #12]\n\t" +#else "strd r8, r9, [%[r], #0]\n\t" "strd r10, r11, [%[r], #8]\n\t" +#endif : : [r] "r" (r), [a] "r" (a) : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r14" @@ -35331,6 +36331,314 @@ static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, ); } +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) +static int sp_256_num_bits_8(sp_digit* a) +{ + static const byte sp_num_bits_table[256] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + }; + const byte* table = sp_num_bits_table; + int r = 0; + + __asm__ __volatile__ ( + "ldr r2, [%[a], #28]\n\t" + "cmp r2, #0\n\t" + "beq 7f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 307f\n\t" + "mov r3, #248\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n307:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 207f\n\t" + "mov r3, #240\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n207:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 107f\n\t" + "mov r3, #232\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n107:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #224\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n7:\n\t" + "ldr r2, [%[a], #24]\n\t" + "cmp r2, #0\n\t" + "beq 6f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 306f\n\t" + "mov r3, #216\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n306:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 206f\n\t" + "mov r3, #208\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n206:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 106f\n\t" + "mov r3, #200\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n106:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #192\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n6:\n\t" + "ldr r2, [%[a], #20]\n\t" + "cmp r2, #0\n\t" + "beq 5f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 305f\n\t" + "mov r3, #184\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n305:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 205f\n\t" + "mov r3, #176\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n205:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 105f\n\t" + "mov r3, #168\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n105:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #160\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n5:\n\t" + "ldr r2, [%[a], #16]\n\t" + "cmp r2, #0\n\t" + "beq 4f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 304f\n\t" + "mov r3, #152\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n304:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 204f\n\t" + "mov r3, #144\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n204:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 104f\n\t" + "mov r3, #136\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n104:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #128\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n4:\n\t" + "ldr r2, [%[a], #12]\n\t" + "cmp r2, #0\n\t" + "beq 3f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 303f\n\t" + "mov r3, #120\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n303:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 203f\n\t" + "mov r3, #112\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n203:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 103f\n\t" + "mov r3, #104\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n103:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #96\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n3:\n\t" + "ldr r2, [%[a], #8]\n\t" + "cmp r2, #0\n\t" + "beq 2f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 302f\n\t" + "mov r3, #88\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n302:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 202f\n\t" + "mov r3, #80\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n202:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 102f\n\t" + "mov r3, #72\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n102:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #64\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n2:\n\t" + "ldr r2, [%[a], #4]\n\t" + "cmp r2, #0\n\t" + "beq 1f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 301f\n\t" + "mov r3, #56\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n301:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 201f\n\t" + "mov r3, #48\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n201:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 101f\n\t" + "mov r3, #40\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n101:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #32\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n1:\n\t" + "ldr r2, [%[a], #0]\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 300f\n\t" + "mov r3, #24\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n300:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 200f\n\t" + "mov r3, #16\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n200:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 100f\n\t" + "mov r3, #8\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 9f\n\t" + "\n100:\n\t" + "and r4, r2, #0xff\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "\n9:\n\t" + : [r] "+r" (r) + : [a] "r" (a), [table] "r" (table) + : "r2", "r3", "r4" + ); + + return r; +} +#else static int sp_256_num_bits_8(sp_digit* a) { int r = 0; @@ -35404,6 +36712,7 @@ static int sp_256_num_bits_8(sp_digit* a) return r; } +#endif /* Non-constant time modular inversion. * @@ -37609,10 +38918,21 @@ static void sp_384_sqr_12(sp_digit* r, const sp_digit* a) "ble 1b\n\t" "str r6, [sp, r5]\n\t" "\n4:\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r9, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r9, [%[r], #12]\n\t" +#else "ldrd r6, r7, [sp, #0]\n\t" "ldrd r8, r9, [sp, #8]\n\t" "strd r6, r7, [%[r], #0]\n\t" "strd r8, r9, [%[r], #8]\n\t" +#endif "add sp, sp, #16\n\t" "add %[r], %[r], #16\n\t" "subs r5, r5, #16\n\t" @@ -39460,48 +40780,120 @@ static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digi __asm__ __volatile__ ( "mov r8, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" +#else "ldrd r4, r5, [%[a], #0]\n\t" "ldrd r6, r7, [%[b], #0]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" +#else "strd r4, r5, [%[r], #0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" +#else "ldrd r4, r5, [%[a], #8]\n\t" "ldrd r6, r7, [%[b], #8]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" +#else "strd r4, r5, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" +#else "ldrd r4, r5, [%[a], #16]\n\t" "ldrd r6, r7, [%[b], #16]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" +#else "strd r4, r5, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" +#else "ldrd r4, r5, [%[a], #24]\n\t" "ldrd r6, r7, [%[b], #24]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" +#else "strd r4, r5, [%[r], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" +#else "ldrd r4, r5, [%[a], #32]\n\t" "ldrd r6, r7, [%[b], #32]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" +#else "strd r4, r5, [%[r], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" +#else "ldrd r4, r5, [%[a], #40]\n\t" "ldrd r6, r7, [%[b], #40]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #40]\n\t" + "str r5, [%[r], #44]\n\t" +#else "strd r4, r5, [%[r], #40]\n\t" +#endif "adc %[c], r8, r8\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) @@ -39532,7 +40924,12 @@ static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b static void sp_384_rshift1_12(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" +#else "ldrd r2, r3, [%[a]]\n\t" +#endif "lsr r2, r2, #1\n\t" "orr r2, r2, r3, lsl #31\n\t" "lsr r3, r3, #1\n\t" @@ -44429,6 +45826,454 @@ static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, ); } +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) +static int sp_384_num_bits_12(sp_digit* a) +{ + static const byte sp_num_bits_table[256] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + }; + const byte* table = sp_num_bits_table; + int r = 0; + + __asm__ __volatile__ ( + "ldr r2, [%[a], #44]\n\t" + "cmp r2, #0\n\t" + "beq 11f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 311f\n\t" + "mov r3, #376\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n311:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 211f\n\t" + "mov r3, #368\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n211:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 111f\n\t" + "mov r3, #360\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n111:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #352\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n11:\n\t" + "ldr r2, [%[a], #40]\n\t" + "cmp r2, #0\n\t" + "beq 10f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 310f\n\t" + "mov r3, #344\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n310:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 210f\n\t" + "mov r3, #336\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n210:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 110f\n\t" + "mov r3, #328\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n110:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #320\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n10:\n\t" + "ldr r2, [%[a], #36]\n\t" + "cmp r2, #0\n\t" + "beq 9f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 309f\n\t" + "mov r3, #312\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n309:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 209f\n\t" + "mov r3, #304\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n209:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 109f\n\t" + "mov r3, #296\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n109:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #288\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n9:\n\t" + "ldr r2, [%[a], #32]\n\t" + "cmp r2, #0\n\t" + "beq 8f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 308f\n\t" + "mov r3, #280\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n308:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 208f\n\t" + "mov r3, #272\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n208:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 108f\n\t" + "mov r3, #264\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n108:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #256\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n8:\n\t" + "ldr r2, [%[a], #28]\n\t" + "cmp r2, #0\n\t" + "beq 7f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 307f\n\t" + "mov r3, #248\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n307:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 207f\n\t" + "mov r3, #240\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n207:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 107f\n\t" + "mov r3, #232\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n107:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #224\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n7:\n\t" + "ldr r2, [%[a], #24]\n\t" + "cmp r2, #0\n\t" + "beq 6f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 306f\n\t" + "mov r3, #216\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n306:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 206f\n\t" + "mov r3, #208\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n206:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 106f\n\t" + "mov r3, #200\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n106:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #192\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n6:\n\t" + "ldr r2, [%[a], #20]\n\t" + "cmp r2, #0\n\t" + "beq 5f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 305f\n\t" + "mov r3, #184\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n305:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 205f\n\t" + "mov r3, #176\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n205:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 105f\n\t" + "mov r3, #168\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n105:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #160\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n5:\n\t" + "ldr r2, [%[a], #16]\n\t" + "cmp r2, #0\n\t" + "beq 4f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 304f\n\t" + "mov r3, #152\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n304:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 204f\n\t" + "mov r3, #144\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n204:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 104f\n\t" + "mov r3, #136\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n104:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #128\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n4:\n\t" + "ldr r2, [%[a], #12]\n\t" + "cmp r2, #0\n\t" + "beq 3f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 303f\n\t" + "mov r3, #120\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n303:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 203f\n\t" + "mov r3, #112\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n203:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 103f\n\t" + "mov r3, #104\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n103:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #96\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n3:\n\t" + "ldr r2, [%[a], #8]\n\t" + "cmp r2, #0\n\t" + "beq 2f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 302f\n\t" + "mov r3, #88\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n302:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 202f\n\t" + "mov r3, #80\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n202:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 102f\n\t" + "mov r3, #72\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n102:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #64\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n2:\n\t" + "ldr r2, [%[a], #4]\n\t" + "cmp r2, #0\n\t" + "beq 1f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 301f\n\t" + "mov r3, #56\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n301:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 201f\n\t" + "mov r3, #48\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n201:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 101f\n\t" + "mov r3, #40\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n101:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #32\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n1:\n\t" + "ldr r2, [%[a], #0]\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 300f\n\t" + "mov r3, #24\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n300:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 200f\n\t" + "mov r3, #16\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n200:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 100f\n\t" + "mov r3, #8\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 13f\n\t" + "\n100:\n\t" + "and r4, r2, #0xff\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "\n13:\n\t" + : [r] "+r" (r) + : [a] "r" (a), [table] "r" (table) + : "r2", "r3", "r4" + ); + + return r; +} +#else static int sp_384_num_bits_12(sp_digit* a) { int r = 0; @@ -44534,6 +46379,7 @@ static int sp_384_num_bits_12(sp_digit* a) return r; } +#endif /* Non-constant time modular inversion. * @@ -47803,10 +49649,21 @@ static void sp_521_sqr_17(sp_digit* r, const sp_digit* a) "add %[r], %[r], #8\n\t" "subs r5, r5, #8\n\t" "\n4:\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r9, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r9, [%[r], #12]\n\t" +#else "ldrd r6, r7, [sp, #0]\n\t" "ldrd r8, r9, [sp, #8]\n\t" "strd r6, r7, [%[r], #0]\n\t" "strd r8, r9, [%[r], #8]\n\t" +#endif "add sp, sp, #16\n\t" "add %[r], %[r], #16\n\t" "subs r5, r5, #16\n\t" @@ -50800,7 +52657,12 @@ static void sp_521_mont_sub_17(sp_digit* r, const sp_digit* a, const sp_digit* b static void sp_521_rshift1_17(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" +#else "ldrd r2, r3, [%[a]]\n\t" +#endif "lsr r2, r2, #1\n\t" "orr r2, r2, r3, lsl #31\n\t" "lsr r3, r3, #1\n\t" @@ -55329,7 +57191,12 @@ static void sp_521_rshift_17(sp_digit* r, const sp_digit* a, byte n) { __asm__ __volatile__ ( "rsb r6, %[n], #32\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" +#else "ldrd r2, r3, [%[a]]\n\t" +#endif "lsr r2, r2, %[n]\n\t" "lsl r5, r3, r6\n\t" "lsr r3, r3, %[n]\n\t" @@ -56897,6 +58764,629 @@ static void sp_521_div2_mod_17(sp_digit* r, const sp_digit* a, ); } +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) +static int sp_521_num_bits_17(sp_digit* a) +{ + static const byte sp_num_bits_table[256] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + }; + const byte* table = sp_num_bits_table; + int r = 0; + + __asm__ __volatile__ ( + "ldr r2, [%[a], #64]\n\t" + "cmp r2, #0\n\t" + "beq 16f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 316f\n\t" + "mov r3, #536\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n316:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 216f\n\t" + "mov r3, #528\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n216:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 116f\n\t" + "mov r3, #520\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n116:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #512\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n16:\n\t" + "ldr r2, [%[a], #60]\n\t" + "cmp r2, #0\n\t" + "beq 15f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 315f\n\t" + "mov r3, #504\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n315:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 215f\n\t" + "mov r3, #496\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n215:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 115f\n\t" + "mov r3, #488\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n115:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #480\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n15:\n\t" + "ldr r2, [%[a], #56]\n\t" + "cmp r2, #0\n\t" + "beq 14f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 314f\n\t" + "mov r3, #472\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n314:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 214f\n\t" + "mov r3, #464\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n214:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 114f\n\t" + "mov r3, #456\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n114:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #448\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n14:\n\t" + "ldr r2, [%[a], #52]\n\t" + "cmp r2, #0\n\t" + "beq 13f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 313f\n\t" + "mov r3, #440\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n313:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 213f\n\t" + "mov r3, #432\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n213:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 113f\n\t" + "mov r3, #424\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n113:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #416\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n13:\n\t" + "ldr r2, [%[a], #48]\n\t" + "cmp r2, #0\n\t" + "beq 12f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 312f\n\t" + "mov r3, #408\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n312:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 212f\n\t" + "mov r3, #400\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n212:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 112f\n\t" + "mov r3, #392\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n112:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #384\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n12:\n\t" + "ldr r2, [%[a], #44]\n\t" + "cmp r2, #0\n\t" + "beq 11f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 311f\n\t" + "mov r3, #376\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n311:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 211f\n\t" + "mov r3, #368\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n211:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 111f\n\t" + "mov r3, #360\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n111:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #352\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n11:\n\t" + "ldr r2, [%[a], #40]\n\t" + "cmp r2, #0\n\t" + "beq 10f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 310f\n\t" + "mov r3, #344\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n310:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 210f\n\t" + "mov r3, #336\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n210:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 110f\n\t" + "mov r3, #328\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n110:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #320\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n10:\n\t" + "ldr r2, [%[a], #36]\n\t" + "cmp r2, #0\n\t" + "beq 9f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 309f\n\t" + "mov r3, #312\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n309:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 209f\n\t" + "mov r3, #304\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n209:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 109f\n\t" + "mov r3, #296\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n109:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #288\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n9:\n\t" + "ldr r2, [%[a], #32]\n\t" + "cmp r2, #0\n\t" + "beq 8f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 308f\n\t" + "mov r3, #280\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n308:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 208f\n\t" + "mov r3, #272\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n208:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 108f\n\t" + "mov r3, #264\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n108:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #256\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n8:\n\t" + "ldr r2, [%[a], #28]\n\t" + "cmp r2, #0\n\t" + "beq 7f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 307f\n\t" + "mov r3, #248\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n307:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 207f\n\t" + "mov r3, #240\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n207:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 107f\n\t" + "mov r3, #232\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n107:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #224\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n7:\n\t" + "ldr r2, [%[a], #24]\n\t" + "cmp r2, #0\n\t" + "beq 6f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 306f\n\t" + "mov r3, #216\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n306:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 206f\n\t" + "mov r3, #208\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n206:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 106f\n\t" + "mov r3, #200\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n106:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #192\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n6:\n\t" + "ldr r2, [%[a], #20]\n\t" + "cmp r2, #0\n\t" + "beq 5f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 305f\n\t" + "mov r3, #184\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n305:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 205f\n\t" + "mov r3, #176\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n205:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 105f\n\t" + "mov r3, #168\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n105:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #160\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n5:\n\t" + "ldr r2, [%[a], #16]\n\t" + "cmp r2, #0\n\t" + "beq 4f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 304f\n\t" + "mov r3, #152\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n304:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 204f\n\t" + "mov r3, #144\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n204:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 104f\n\t" + "mov r3, #136\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n104:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #128\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n4:\n\t" + "ldr r2, [%[a], #12]\n\t" + "cmp r2, #0\n\t" + "beq 3f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 303f\n\t" + "mov r3, #120\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n303:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 203f\n\t" + "mov r3, #112\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n203:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 103f\n\t" + "mov r3, #104\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n103:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #96\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n3:\n\t" + "ldr r2, [%[a], #8]\n\t" + "cmp r2, #0\n\t" + "beq 2f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 302f\n\t" + "mov r3, #88\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n302:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 202f\n\t" + "mov r3, #80\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n202:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 102f\n\t" + "mov r3, #72\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n102:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #64\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n2:\n\t" + "ldr r2, [%[a], #4]\n\t" + "cmp r2, #0\n\t" + "beq 1f\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 301f\n\t" + "mov r3, #56\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n301:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 201f\n\t" + "mov r3, #48\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n201:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 101f\n\t" + "mov r3, #40\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n101:\n\t" + "and r4, r2, #0xff\n\t" + "mov r3, #32\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n1:\n\t" + "ldr r2, [%[a], #0]\n\t" + "lsr r4, r2, #24\n\t" + "cmp r4, #0\n\t" + "beq 300f\n\t" + "mov r3, #24\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n300:\n\t" + "lsr r4, r2, #16\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 200f\n\t" + "mov r3, #16\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n200:\n\t" + "lsr r4, r2, #8\n\t" + "and r4, r4, #0xff\n\t" + "cmp r4, #0\n\t" + "beq 100f\n\t" + "mov r3, #8\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "add %[r], r3, %[r]\n\t" + "b 18f\n\t" + "\n100:\n\t" + "and r4, r2, #0xff\n\t" + "ldrb %[r], [%[table], r4]\n\t" + "\n18:\n\t" + : [r] "+r" (r) + : [a] "r" (a), [table] "r" (table) + : "r2", "r3", "r4" + ); + + return r; +} +#else static int sp_521_num_bits_17(sp_digit* a) { int r = 0; @@ -57042,6 +59532,7 @@ static int sp_521_num_bits_17(sp_digit* a) return r; } +#endif /* Non-constant time modular inversion. * @@ -61435,10 +63926,21 @@ static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a) "ble 1b\n\t" "str r6, [sp, r5]\n\t" "\n4:\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r6, [sp, #0]\n\t" + "ldr r7, [sp, #4]\n\t" + "ldr r8, [sp, #8]\n\t" + "ldr r9, [sp, #12]\n\t" + "str r6, [%[r], #0]\n\t" + "str r7, [%[r], #4]\n\t" + "str r8, [%[r], #8]\n\t" + "str r9, [%[r], #12]\n\t" +#else "ldrd r6, r7, [sp, #0]\n\t" "ldrd r8, r9, [sp, #8]\n\t" "strd r6, r7, [%[r], #0]\n\t" "strd r8, r9, [%[r], #8]\n\t" +#endif "add sp, sp, #16\n\t" "add %[r], %[r], #16\n\t" "subs r5, r5, #16\n\t" @@ -64160,118 +66662,310 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_dig __asm__ __volatile__ ( "mov r8, #0\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #0]\n\t" + "ldr r5, [%[a], #4]\n\t" + "ldr r6, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" +#else "ldrd r4, r5, [%[a], #0]\n\t" "ldrd r6, r7, [%[b], #0]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adds r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #0]\n\t" + "str r5, [%[r], #4]\n\t" +#else "strd r4, r5, [%[r], #0]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #8]\n\t" + "ldr r5, [%[a], #12]\n\t" + "ldr r6, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" +#else "ldrd r4, r5, [%[a], #8]\n\t" "ldrd r6, r7, [%[b], #8]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #8]\n\t" + "str r5, [%[r], #12]\n\t" +#else "strd r4, r5, [%[r], #8]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #16]\n\t" + "ldr r5, [%[a], #20]\n\t" + "ldr r6, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" +#else "ldrd r4, r5, [%[a], #16]\n\t" "ldrd r6, r7, [%[b], #16]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #16]\n\t" + "str r5, [%[r], #20]\n\t" +#else "strd r4, r5, [%[r], #16]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #24]\n\t" + "ldr r5, [%[a], #28]\n\t" + "ldr r6, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" +#else "ldrd r4, r5, [%[a], #24]\n\t" "ldrd r6, r7, [%[b], #24]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #24]\n\t" + "str r5, [%[r], #28]\n\t" +#else "strd r4, r5, [%[r], #24]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #32]\n\t" + "ldr r5, [%[a], #36]\n\t" + "ldr r6, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" +#else "ldrd r4, r5, [%[a], #32]\n\t" "ldrd r6, r7, [%[b], #32]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #32]\n\t" + "str r5, [%[r], #36]\n\t" +#else "strd r4, r5, [%[r], #32]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #40]\n\t" + "ldr r5, [%[a], #44]\n\t" + "ldr r6, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" +#else "ldrd r4, r5, [%[a], #40]\n\t" "ldrd r6, r7, [%[b], #40]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #40]\n\t" + "str r5, [%[r], #44]\n\t" +#else "strd r4, r5, [%[r], #40]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #48]\n\t" + "ldr r5, [%[a], #52]\n\t" + "ldr r6, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" +#else "ldrd r4, r5, [%[a], #48]\n\t" "ldrd r6, r7, [%[b], #48]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #48]\n\t" + "str r5, [%[r], #52]\n\t" +#else "strd r4, r5, [%[r], #48]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #56]\n\t" + "ldr r5, [%[a], #60]\n\t" + "ldr r6, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" +#else "ldrd r4, r5, [%[a], #56]\n\t" "ldrd r6, r7, [%[b], #56]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #56]\n\t" + "str r5, [%[r], #60]\n\t" +#else "strd r4, r5, [%[r], #56]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #64]\n\t" + "ldr r5, [%[a], #68]\n\t" + "ldr r6, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" +#else "ldrd r4, r5, [%[a], #64]\n\t" "ldrd r6, r7, [%[b], #64]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #64]\n\t" + "str r5, [%[r], #68]\n\t" +#else "strd r4, r5, [%[r], #64]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #72]\n\t" + "ldr r5, [%[a], #76]\n\t" + "ldr r6, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" +#else "ldrd r4, r5, [%[a], #72]\n\t" "ldrd r6, r7, [%[b], #72]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #72]\n\t" + "str r5, [%[r], #76]\n\t" +#else "strd r4, r5, [%[r], #72]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #80]\n\t" + "ldr r5, [%[a], #84]\n\t" + "ldr r6, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" +#else "ldrd r4, r5, [%[a], #80]\n\t" "ldrd r6, r7, [%[b], #80]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #80]\n\t" + "str r5, [%[r], #84]\n\t" +#else "strd r4, r5, [%[r], #80]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #88]\n\t" + "ldr r5, [%[a], #92]\n\t" + "ldr r6, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" +#else "ldrd r4, r5, [%[a], #88]\n\t" "ldrd r6, r7, [%[b], #88]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #88]\n\t" + "str r5, [%[r], #92]\n\t" +#else "strd r4, r5, [%[r], #88]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #96]\n\t" + "ldr r5, [%[a], #100]\n\t" + "ldr r6, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" +#else "ldrd r4, r5, [%[a], #96]\n\t" "ldrd r6, r7, [%[b], #96]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #96]\n\t" + "str r5, [%[r], #100]\n\t" +#else "strd r4, r5, [%[r], #96]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #104]\n\t" + "ldr r5, [%[a], #108]\n\t" + "ldr r6, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" +#else "ldrd r4, r5, [%[a], #104]\n\t" "ldrd r6, r7, [%[b], #104]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #104]\n\t" + "str r5, [%[r], #108]\n\t" +#else "strd r4, r5, [%[r], #104]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #112]\n\t" + "ldr r5, [%[a], #116]\n\t" + "ldr r6, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" +#else "ldrd r4, r5, [%[a], #112]\n\t" "ldrd r6, r7, [%[b], #112]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #112]\n\t" + "str r5, [%[r], #116]\n\t" +#else "strd r4, r5, [%[r], #112]\n\t" +#endif +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r4, [%[a], #120]\n\t" + "ldr r5, [%[a], #124]\n\t" + "ldr r6, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" +#else "ldrd r4, r5, [%[a], #120]\n\t" "ldrd r6, r7, [%[b], #120]\n\t" +#endif "and r6, r6, %[m]\n\t" "and r7, r7, %[m]\n\t" "adcs r4, r4, r6\n\t" "adcs r5, r5, r7\n\t" +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "str r4, [%[r], #120]\n\t" + "str r5, [%[r], #124]\n\t" +#else "strd r4, r5, [%[r], #120]\n\t" +#endif "adc %[c], r8, r8\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) @@ -64285,7 +66979,12 @@ static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_dig static void sp_1024_rshift1_32(sp_digit* r, const sp_digit* a) { __asm__ __volatile__ ( +#if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7) + "ldr r2, [%[a]]\n\t" + "ldr r3, [%[a], #4]\n\t" +#else "ldrd r2, r3, [%[a]]\n\t" +#endif "lsr r2, r2, #1\n\t" "orr r2, r2, r3, lsl #31\n\t" "lsr r3, r3, #1\n\t" diff --git a/wolfcrypt/src/sp_x86_64_asm.S b/wolfcrypt/src/sp_x86_64_asm.S index dc2cfd7c4..63afab613 100644 --- a/wolfcrypt/src/sp_x86_64_asm.S +++ b/wolfcrypt/src/sp_x86_64_asm.S @@ -20,7 +20,7 @@ */ #ifdef WOLFSSL_USER_SETTINGS -#include "wolfssl/wolfcrpyt/settings.h" +#include "wolfssl/wolfcrypt/settings.h" #endif #ifndef HAVE_INTEL_AVX1