From 6321eabf86a30210adb3856049767dd3fbf79e81 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Thu, 12 Mar 2020 09:33:52 +1000 Subject: [PATCH] Fix SP RSA private op tmpa - tmpb can be less than -p. Need to conditionally add p twice. C and multiple platform fix. --- wolfcrypt/src/sp_arm32.c | 1037 +++++++++++++++++++++++--- wolfcrypt/src/sp_arm64.c | 1373 +++++++++++++++++++---------------- wolfcrypt/src/sp_armthumb.c | 184 ++++- wolfcrypt/src/sp_c32.c | 157 +--- wolfcrypt/src/sp_c64.c | 159 +--- wolfcrypt/src/sp_cortexm.c | 161 +++- wolfcrypt/src/sp_x86_64.c | 12 +- 7 files changed, 2047 insertions(+), 1036 deletions(-) diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index ef47c8f7b..9692dd4f5 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -7486,6 +7486,212 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #128\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adds r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r6, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #48]\n\t" + "str r6, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r6, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #56]\n\t" + "str r6, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r6, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #64]\n\t" + "str r6, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r6, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r6, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #80]\n\t" + "str r6, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r6, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r6, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #96]\n\t" + "str r6, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r6, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #104]\n\t" + "str r6, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r6, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #112]\n\t" + "str r6, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r6, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #120]\n\t" + "str r6, [%[r], #124]\n\t" + "adc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -7520,7 +7726,6 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -7550,8 +7755,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 32; tmpb = tmpa + 64; - tmp = t; - r = tmp + 64; + r = t + 64; } #else r = a = ad; @@ -7560,7 +7764,6 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, qi = dq = dp = dpd; tmpa = tmpad; tmpb = tmpbd; - tmp = a + 64; #endif if (err == MP_OKAY) { @@ -7578,8 +7781,8 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { c = sp_2048_sub_in_place_32(tmpa, tmpb); - sp_2048_mask_32(tmp, p, c); - sp_2048_add_32(tmpa, tmpa, tmp); + c += sp_2048_cond_add_32(tmpa, tmpa, p, c); + sp_2048_cond_add_32(tmpa, tmpa, p, c); sp_2048_from_mp(qi, 32, qim); sp_2048_mul_32(tmpa, tmpa, qi); @@ -7610,7 +7813,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, return err; } -#endif +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) @@ -18769,6 +18972,292 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #192\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adds r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r6, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #48]\n\t" + "str r6, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r6, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #56]\n\t" + "str r6, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r6, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #64]\n\t" + "str r6, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r6, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r6, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #80]\n\t" + "str r6, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r6, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r6, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #96]\n\t" + "str r6, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r6, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #104]\n\t" + "str r6, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r6, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #112]\n\t" + "str r6, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r6, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #120]\n\t" + "str r6, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r6, [%[a], #132]\n\t" + "ldr r5, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #128]\n\t" + "str r6, [%[r], #132]\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r6, [%[a], #140]\n\t" + "ldr r5, [%[b], #136]\n\t" + "ldr r7, [%[b], #140]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #136]\n\t" + "str r6, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r6, [%[a], #148]\n\t" + "ldr r5, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #144]\n\t" + "str r6, [%[r], #148]\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r6, [%[a], #156]\n\t" + "ldr r5, [%[b], #152]\n\t" + "ldr r7, [%[b], #156]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #152]\n\t" + "str r6, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r6, [%[a], #164]\n\t" + "ldr r5, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #160]\n\t" + "str r6, [%[r], #164]\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r6, [%[a], #172]\n\t" + "ldr r5, [%[b], #168]\n\t" + "ldr r7, [%[b], #172]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #168]\n\t" + "str r6, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r6, [%[a], #180]\n\t" + "ldr r5, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #176]\n\t" + "str r6, [%[r], #180]\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r6, [%[a], #188]\n\t" + "ldr r5, [%[b], #184]\n\t" + "ldr r7, [%[b], #188]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #184]\n\t" + "str r6, [%[r], #188]\n\t" + "adc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -18803,7 +19292,6 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -18833,8 +19321,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 48; tmpb = tmpa + 96; - tmp = t; - r = tmp + 96; + r = t + 96; } #else r = a = ad; @@ -18843,7 +19330,6 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, qi = dq = dp = dpd; tmpa = tmpad; tmpb = tmpbd; - tmp = a + 96; #endif if (err == MP_OKAY) { @@ -18861,8 +19347,8 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { c = sp_3072_sub_in_place_48(tmpa, tmpb); - sp_3072_mask_48(tmp, p, c); - sp_3072_add_48(tmpa, tmpa, tmp); + c += sp_3072_cond_add_48(tmpa, tmpa, p, c); + sp_3072_cond_add_48(tmpa, tmpa, p, c); sp_3072_from_mp(qi, 48, qim); sp_3072_mul_48(tmpa, tmpa, qi); @@ -18893,7 +19379,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, return err; } -#endif +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) @@ -71046,6 +71532,372 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov r9, #0\n\t" + "mov r8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #256\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adds r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "ldr r4, [%[a], #48]\n\t" + "ldr r6, [%[a], #52]\n\t" + "ldr r5, [%[b], #48]\n\t" + "ldr r7, [%[b], #52]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #48]\n\t" + "str r6, [%[r], #52]\n\t" + "ldr r4, [%[a], #56]\n\t" + "ldr r6, [%[a], #60]\n\t" + "ldr r5, [%[b], #56]\n\t" + "ldr r7, [%[b], #60]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #56]\n\t" + "str r6, [%[r], #60]\n\t" + "ldr r4, [%[a], #64]\n\t" + "ldr r6, [%[a], #68]\n\t" + "ldr r5, [%[b], #64]\n\t" + "ldr r7, [%[b], #68]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #64]\n\t" + "str r6, [%[r], #68]\n\t" + "ldr r4, [%[a], #72]\n\t" + "ldr r6, [%[a], #76]\n\t" + "ldr r5, [%[b], #72]\n\t" + "ldr r7, [%[b], #76]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #72]\n\t" + "str r6, [%[r], #76]\n\t" + "ldr r4, [%[a], #80]\n\t" + "ldr r6, [%[a], #84]\n\t" + "ldr r5, [%[b], #80]\n\t" + "ldr r7, [%[b], #84]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #80]\n\t" + "str r6, [%[r], #84]\n\t" + "ldr r4, [%[a], #88]\n\t" + "ldr r6, [%[a], #92]\n\t" + "ldr r5, [%[b], #88]\n\t" + "ldr r7, [%[b], #92]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #88]\n\t" + "str r6, [%[r], #92]\n\t" + "ldr r4, [%[a], #96]\n\t" + "ldr r6, [%[a], #100]\n\t" + "ldr r5, [%[b], #96]\n\t" + "ldr r7, [%[b], #100]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #96]\n\t" + "str r6, [%[r], #100]\n\t" + "ldr r4, [%[a], #104]\n\t" + "ldr r6, [%[a], #108]\n\t" + "ldr r5, [%[b], #104]\n\t" + "ldr r7, [%[b], #108]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #104]\n\t" + "str r6, [%[r], #108]\n\t" + "ldr r4, [%[a], #112]\n\t" + "ldr r6, [%[a], #116]\n\t" + "ldr r5, [%[b], #112]\n\t" + "ldr r7, [%[b], #116]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #112]\n\t" + "str r6, [%[r], #116]\n\t" + "ldr r4, [%[a], #120]\n\t" + "ldr r6, [%[a], #124]\n\t" + "ldr r5, [%[b], #120]\n\t" + "ldr r7, [%[b], #124]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #120]\n\t" + "str r6, [%[r], #124]\n\t" + "ldr r4, [%[a], #128]\n\t" + "ldr r6, [%[a], #132]\n\t" + "ldr r5, [%[b], #128]\n\t" + "ldr r7, [%[b], #132]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #128]\n\t" + "str r6, [%[r], #132]\n\t" + "ldr r4, [%[a], #136]\n\t" + "ldr r6, [%[a], #140]\n\t" + "ldr r5, [%[b], #136]\n\t" + "ldr r7, [%[b], #140]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #136]\n\t" + "str r6, [%[r], #140]\n\t" + "ldr r4, [%[a], #144]\n\t" + "ldr r6, [%[a], #148]\n\t" + "ldr r5, [%[b], #144]\n\t" + "ldr r7, [%[b], #148]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #144]\n\t" + "str r6, [%[r], #148]\n\t" + "ldr r4, [%[a], #152]\n\t" + "ldr r6, [%[a], #156]\n\t" + "ldr r5, [%[b], #152]\n\t" + "ldr r7, [%[b], #156]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #152]\n\t" + "str r6, [%[r], #156]\n\t" + "ldr r4, [%[a], #160]\n\t" + "ldr r6, [%[a], #164]\n\t" + "ldr r5, [%[b], #160]\n\t" + "ldr r7, [%[b], #164]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #160]\n\t" + "str r6, [%[r], #164]\n\t" + "ldr r4, [%[a], #168]\n\t" + "ldr r6, [%[a], #172]\n\t" + "ldr r5, [%[b], #168]\n\t" + "ldr r7, [%[b], #172]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #168]\n\t" + "str r6, [%[r], #172]\n\t" + "ldr r4, [%[a], #176]\n\t" + "ldr r6, [%[a], #180]\n\t" + "ldr r5, [%[b], #176]\n\t" + "ldr r7, [%[b], #180]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #176]\n\t" + "str r6, [%[r], #180]\n\t" + "ldr r4, [%[a], #184]\n\t" + "ldr r6, [%[a], #188]\n\t" + "ldr r5, [%[b], #184]\n\t" + "ldr r7, [%[b], #188]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #184]\n\t" + "str r6, [%[r], #188]\n\t" + "ldr r4, [%[a], #192]\n\t" + "ldr r6, [%[a], #196]\n\t" + "ldr r5, [%[b], #192]\n\t" + "ldr r7, [%[b], #196]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #192]\n\t" + "str r6, [%[r], #196]\n\t" + "ldr r4, [%[a], #200]\n\t" + "ldr r6, [%[a], #204]\n\t" + "ldr r5, [%[b], #200]\n\t" + "ldr r7, [%[b], #204]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #200]\n\t" + "str r6, [%[r], #204]\n\t" + "ldr r4, [%[a], #208]\n\t" + "ldr r6, [%[a], #212]\n\t" + "ldr r5, [%[b], #208]\n\t" + "ldr r7, [%[b], #212]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #208]\n\t" + "str r6, [%[r], #212]\n\t" + "ldr r4, [%[a], #216]\n\t" + "ldr r6, [%[a], #220]\n\t" + "ldr r5, [%[b], #216]\n\t" + "ldr r7, [%[b], #220]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #216]\n\t" + "str r6, [%[r], #220]\n\t" + "ldr r4, [%[a], #224]\n\t" + "ldr r6, [%[a], #228]\n\t" + "ldr r5, [%[b], #224]\n\t" + "ldr r7, [%[b], #228]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #224]\n\t" + "str r6, [%[r], #228]\n\t" + "ldr r4, [%[a], #232]\n\t" + "ldr r6, [%[a], #236]\n\t" + "ldr r5, [%[b], #232]\n\t" + "ldr r7, [%[b], #236]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #232]\n\t" + "str r6, [%[r], #236]\n\t" + "ldr r4, [%[a], #240]\n\t" + "ldr r6, [%[a], #244]\n\t" + "ldr r5, [%[b], #240]\n\t" + "ldr r7, [%[b], #244]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #240]\n\t" + "str r6, [%[r], #244]\n\t" + "ldr r4, [%[a], #248]\n\t" + "ldr r6, [%[a], #252]\n\t" + "ldr r5, [%[b], #248]\n\t" + "ldr r7, [%[b], #252]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #248]\n\t" + "str r6, [%[r], #252]\n\t" + "adc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -71080,7 +71932,6 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -71110,8 +71961,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 64; tmpb = tmpa + 128; - tmp = t; - r = tmp + 128; + r = t + 128; } #else r = a = ad; @@ -71120,7 +71970,6 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, qi = dq = dp = dpd; tmpa = tmpad; tmpb = tmpbd; - tmp = a + 128; #endif if (err == MP_OKAY) { @@ -71138,8 +71987,8 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { c = sp_4096_sub_in_place_64(tmpa, tmpb); - sp_4096_mask_64(tmp, p, c); - sp_4096_add_64(tmpa, tmpa, tmp); + c += sp_4096_cond_add_64(tmpa, tmpa, p, c); + sp_4096_cond_add_64(tmpa, tmpa, p, c); sp_4096_from_mp(qi, 64, qim); sp_4096_mul_64(tmpa, tmpa, qi); @@ -71170,7 +72019,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, return err; } -#endif +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) @@ -83497,73 +84346,95 @@ static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digi { sp_digit c = 0; +#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( + "mov r9, #0\n\t" "mov r8, #0\n\t" - "ldr r4, [%[a], #0]\n\t" - "ldr r6, [%[a], #4]\n\t" - "ldr r5, [%[b], #0]\n\t" - "ldr r7, [%[b], #4]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adds r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #0]\n\t" - "str r6, [%[r], #4]\n\t" - "ldr r4, [%[a], #8]\n\t" - "ldr r6, [%[a], #12]\n\t" - "ldr r5, [%[b], #8]\n\t" - "ldr r7, [%[b], #12]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #8]\n\t" - "str r6, [%[r], #12]\n\t" - "ldr r4, [%[a], #16]\n\t" - "ldr r6, [%[a], #20]\n\t" - "ldr r5, [%[b], #16]\n\t" - "ldr r7, [%[b], #20]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #16]\n\t" - "str r6, [%[r], #20]\n\t" - "ldr r4, [%[a], #24]\n\t" - "ldr r6, [%[a], #28]\n\t" - "ldr r5, [%[b], #24]\n\t" - "ldr r7, [%[b], #28]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #24]\n\t" - "str r6, [%[r], #28]\n\t" - "ldr r4, [%[a], #32]\n\t" - "ldr r6, [%[a], #36]\n\t" - "ldr r5, [%[b], #32]\n\t" - "ldr r7, [%[b], #36]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #32]\n\t" - "str r6, [%[r], #36]\n\t" - "ldr r4, [%[a], #40]\n\t" - "ldr r6, [%[a], #44]\n\t" - "ldr r5, [%[b], #40]\n\t" - "ldr r7, [%[b], #44]\n\t" - "and r5, r5, %[m]\n\t" - "and r7, r7, %[m]\n\t" - "adcs r4, r4, r5\n\t" - "adcs r6, r6, r7\n\t" - "str r4, [%[r], #40]\n\t" - "str r6, [%[r], #44]\n\t" - "adc %[c], r8, r8\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr r4, [%[a], r8]\n\t" + "ldr r5, [%[b], r8]\n\t" + "and r5, r5, %[m]\n\t" + "sbcs r4, r4, r5\n\t" + "sbc %[c], r9, r9\n\t" + "str r4, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, #48\n\t" + "blt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r4", "r6", "r5", "r7", "r8" + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" ); +#else + __asm__ __volatile__ ( + + "mov r9, #0\n\t" + "ldr r4, [%[a], #0]\n\t" + "ldr r6, [%[a], #4]\n\t" + "ldr r5, [%[b], #0]\n\t" + "ldr r7, [%[b], #4]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adds r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #0]\n\t" + "str r6, [%[r], #4]\n\t" + "ldr r4, [%[a], #8]\n\t" + "ldr r6, [%[a], #12]\n\t" + "ldr r5, [%[b], #8]\n\t" + "ldr r7, [%[b], #12]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #8]\n\t" + "str r6, [%[r], #12]\n\t" + "ldr r4, [%[a], #16]\n\t" + "ldr r6, [%[a], #20]\n\t" + "ldr r5, [%[b], #16]\n\t" + "ldr r7, [%[b], #20]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #16]\n\t" + "str r6, [%[r], #20]\n\t" + "ldr r4, [%[a], #24]\n\t" + "ldr r6, [%[a], #28]\n\t" + "ldr r5, [%[b], #24]\n\t" + "ldr r7, [%[b], #28]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #24]\n\t" + "str r6, [%[r], #28]\n\t" + "ldr r4, [%[a], #32]\n\t" + "ldr r6, [%[a], #36]\n\t" + "ldr r5, [%[b], #32]\n\t" + "ldr r7, [%[b], #36]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #32]\n\t" + "str r6, [%[r], #36]\n\t" + "ldr r4, [%[a], #40]\n\t" + "ldr r6, [%[a], #44]\n\t" + "ldr r5, [%[b], #40]\n\t" + "ldr r7, [%[b], #44]\n\t" + "and r5, r5, %[m]\n\t" + "and r7, r7, %[m]\n\t" + "adcs r4, r4, r5\n\t" + "adcs r6, r6, r7\n\t" + "str r4, [%[r], #40]\n\t" + "str r6, [%[r], #44]\n\t" + "adc %[c], r9, r9\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r4", "r6", "r5", "r7", "r8", "r9" + ); +#endif /* WOLFSSL_SP_SMALL */ return c; } diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c index 30e7d0ff5..dcee7ffdb 100644 --- a/wolfcrypt/src/sp_arm64.c +++ b/wolfcrypt/src/sp_arm64.c @@ -2275,86 +2275,62 @@ static sp_digit sp_2048_cond_sub_16(sp_digit* r, const sp_digit* a, const sp_dig #else __asm__ __volatile__ ( - "ldr x4, [%[a], 0]\n\t" - "ldr x6, [%[a], 8]\n\t" - "ldr x5, [%[b], 0]\n\t" - "ldr x7, [%[b], 8]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "ldp x5, x7, [%[b], 0]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "subs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 0]\n\t" - "str x6, [%[r], 8]\n\t" - "ldr x4, [%[a], 16]\n\t" - "ldr x6, [%[a], 24]\n\t" - "ldr x5, [%[b], 16]\n\t" - "ldr x7, [%[b], 24]\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "ldp x4, x6, [%[a], 16]\n\t" + "ldp x5, x7, [%[b], 16]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 16]\n\t" - "str x6, [%[r], 24]\n\t" - "ldr x4, [%[a], 32]\n\t" - "ldr x6, [%[a], 40]\n\t" - "ldr x5, [%[b], 32]\n\t" - "ldr x7, [%[b], 40]\n\t" + "stp x4, x6, [%[r], 16]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 32]\n\t" - "str x6, [%[r], 40]\n\t" - "ldr x4, [%[a], 48]\n\t" - "ldr x6, [%[a], 56]\n\t" - "ldr x5, [%[b], 48]\n\t" - "ldr x7, [%[b], 56]\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "ldp x4, x6, [%[a], 48]\n\t" + "ldp x5, x7, [%[b], 48]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 48]\n\t" - "str x6, [%[r], 56]\n\t" - "ldr x4, [%[a], 64]\n\t" - "ldr x6, [%[a], 72]\n\t" - "ldr x5, [%[b], 64]\n\t" - "ldr x7, [%[b], 72]\n\t" + "stp x4, x6, [%[r], 48]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 64]\n\t" - "str x6, [%[r], 72]\n\t" - "ldr x4, [%[a], 80]\n\t" - "ldr x6, [%[a], 88]\n\t" - "ldr x5, [%[b], 80]\n\t" - "ldr x7, [%[b], 88]\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "ldp x4, x6, [%[a], 80]\n\t" + "ldp x5, x7, [%[b], 80]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 80]\n\t" - "str x6, [%[r], 88]\n\t" - "ldr x4, [%[a], 96]\n\t" - "ldr x6, [%[a], 104]\n\t" - "ldr x5, [%[b], 96]\n\t" - "ldr x7, [%[b], 104]\n\t" + "stp x4, x6, [%[r], 80]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 96]\n\t" - "str x6, [%[r], 104]\n\t" - "ldr x4, [%[a], 112]\n\t" - "ldr x6, [%[a], 120]\n\t" - "ldr x5, [%[b], 112]\n\t" - "ldr x7, [%[b], 120]\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "ldp x4, x6, [%[a], 112]\n\t" + "ldp x5, x7, [%[b], 112]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 112]\n\t" - "str x6, [%[r], 120]\n\t" + "stp x4, x6, [%[r], 112]\n\t" "csetm %[c], cc\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) @@ -3388,166 +3364,118 @@ static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a, const sp_dig #else __asm__ __volatile__ ( - "ldr x4, [%[a], 0]\n\t" - "ldr x6, [%[a], 8]\n\t" - "ldr x5, [%[b], 0]\n\t" - "ldr x7, [%[b], 8]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "ldp x5, x7, [%[b], 0]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "subs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 0]\n\t" - "str x6, [%[r], 8]\n\t" - "ldr x4, [%[a], 16]\n\t" - "ldr x6, [%[a], 24]\n\t" - "ldr x5, [%[b], 16]\n\t" - "ldr x7, [%[b], 24]\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "ldp x4, x6, [%[a], 16]\n\t" + "ldp x5, x7, [%[b], 16]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 16]\n\t" - "str x6, [%[r], 24]\n\t" - "ldr x4, [%[a], 32]\n\t" - "ldr x6, [%[a], 40]\n\t" - "ldr x5, [%[b], 32]\n\t" - "ldr x7, [%[b], 40]\n\t" + "stp x4, x6, [%[r], 16]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 32]\n\t" - "str x6, [%[r], 40]\n\t" - "ldr x4, [%[a], 48]\n\t" - "ldr x6, [%[a], 56]\n\t" - "ldr x5, [%[b], 48]\n\t" - "ldr x7, [%[b], 56]\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "ldp x4, x6, [%[a], 48]\n\t" + "ldp x5, x7, [%[b], 48]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 48]\n\t" - "str x6, [%[r], 56]\n\t" - "ldr x4, [%[a], 64]\n\t" - "ldr x6, [%[a], 72]\n\t" - "ldr x5, [%[b], 64]\n\t" - "ldr x7, [%[b], 72]\n\t" + "stp x4, x6, [%[r], 48]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 64]\n\t" - "str x6, [%[r], 72]\n\t" - "ldr x4, [%[a], 80]\n\t" - "ldr x6, [%[a], 88]\n\t" - "ldr x5, [%[b], 80]\n\t" - "ldr x7, [%[b], 88]\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "ldp x4, x6, [%[a], 80]\n\t" + "ldp x5, x7, [%[b], 80]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 80]\n\t" - "str x6, [%[r], 88]\n\t" - "ldr x4, [%[a], 96]\n\t" - "ldr x6, [%[a], 104]\n\t" - "ldr x5, [%[b], 96]\n\t" - "ldr x7, [%[b], 104]\n\t" + "stp x4, x6, [%[r], 80]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 96]\n\t" - "str x6, [%[r], 104]\n\t" - "ldr x4, [%[a], 112]\n\t" - "ldr x6, [%[a], 120]\n\t" - "ldr x5, [%[b], 112]\n\t" - "ldr x7, [%[b], 120]\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "ldp x4, x6, [%[a], 112]\n\t" + "ldp x5, x7, [%[b], 112]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 112]\n\t" - "str x6, [%[r], 120]\n\t" - "ldr x4, [%[a], 128]\n\t" - "ldr x6, [%[a], 136]\n\t" - "ldr x5, [%[b], 128]\n\t" - "ldr x7, [%[b], 136]\n\t" + "stp x4, x6, [%[r], 112]\n\t" + "ldp x4, x6, [%[a], 128]\n\t" + "ldp x5, x7, [%[b], 128]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 128]\n\t" - "str x6, [%[r], 136]\n\t" - "ldr x4, [%[a], 144]\n\t" - "ldr x6, [%[a], 152]\n\t" - "ldr x5, [%[b], 144]\n\t" - "ldr x7, [%[b], 152]\n\t" + "stp x4, x6, [%[r], 128]\n\t" + "ldp x4, x6, [%[a], 144]\n\t" + "ldp x5, x7, [%[b], 144]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 144]\n\t" - "str x6, [%[r], 152]\n\t" - "ldr x4, [%[a], 160]\n\t" - "ldr x6, [%[a], 168]\n\t" - "ldr x5, [%[b], 160]\n\t" - "ldr x7, [%[b], 168]\n\t" + "stp x4, x6, [%[r], 144]\n\t" + "ldp x4, x6, [%[a], 160]\n\t" + "ldp x5, x7, [%[b], 160]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 160]\n\t" - "str x6, [%[r], 168]\n\t" - "ldr x4, [%[a], 176]\n\t" - "ldr x6, [%[a], 184]\n\t" - "ldr x5, [%[b], 176]\n\t" - "ldr x7, [%[b], 184]\n\t" + "stp x4, x6, [%[r], 160]\n\t" + "ldp x4, x6, [%[a], 176]\n\t" + "ldp x5, x7, [%[b], 176]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 176]\n\t" - "str x6, [%[r], 184]\n\t" - "ldr x4, [%[a], 192]\n\t" - "ldr x6, [%[a], 200]\n\t" - "ldr x5, [%[b], 192]\n\t" - "ldr x7, [%[b], 200]\n\t" + "stp x4, x6, [%[r], 176]\n\t" + "ldp x4, x6, [%[a], 192]\n\t" + "ldp x5, x7, [%[b], 192]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 192]\n\t" - "str x6, [%[r], 200]\n\t" - "ldr x4, [%[a], 208]\n\t" - "ldr x6, [%[a], 216]\n\t" - "ldr x5, [%[b], 208]\n\t" - "ldr x7, [%[b], 216]\n\t" + "stp x4, x6, [%[r], 192]\n\t" + "ldp x4, x6, [%[a], 208]\n\t" + "ldp x5, x7, [%[b], 208]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 208]\n\t" - "str x6, [%[r], 216]\n\t" - "ldr x4, [%[a], 224]\n\t" - "ldr x6, [%[a], 232]\n\t" - "ldr x5, [%[b], 224]\n\t" - "ldr x7, [%[b], 232]\n\t" + "stp x4, x6, [%[r], 208]\n\t" + "ldp x4, x6, [%[a], 224]\n\t" + "ldp x5, x7, [%[b], 224]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 224]\n\t" - "str x6, [%[r], 232]\n\t" - "ldr x4, [%[a], 240]\n\t" - "ldr x6, [%[a], 248]\n\t" - "ldr x5, [%[b], 240]\n\t" - "ldr x7, [%[b], 248]\n\t" + "stp x4, x6, [%[r], 224]\n\t" + "ldp x4, x6, [%[a], 240]\n\t" + "ldp x5, x7, [%[b], 240]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 240]\n\t" - "str x6, [%[r], 248]\n\t" + "stp x4, x6, [%[r], 240]\n\t" "csetm %[c], cc\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) @@ -4841,6 +4769,115 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, return err; } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_2048_cond_add_16(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "cset %[c], cs\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 128\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8" + ); +#else + __asm__ __volatile__ ( + + "ldp x4, x6, [%[a], 0]\n\t" + "ldp x5, x7, [%[b], 0]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adds x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 8]\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "ldp x4, x6, [%[a], 16]\n\t" + "ldp x5, x7, [%[b], 16]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 24]\n\t" + "stp x4, x6, [%[r], 16]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 40]\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "ldp x4, x6, [%[a], 48]\n\t" + "ldp x5, x7, [%[b], 48]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 56]\n\t" + "stp x4, x6, [%[r], 48]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 72]\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "ldp x4, x6, [%[a], 80]\n\t" + "ldp x5, x7, [%[b], 80]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 88]\n\t" + "stp x4, x6, [%[r], 80]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 104]\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "ldp x4, x6, [%[a], 112]\n\t" + "ldp x5, x7, [%[b], 112]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 120]\n\t" + "stp x4, x6, [%[r], 112]\n\t" + "cset %[c], cs\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -4875,7 +4912,6 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -4905,8 +4941,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 16; tmpb = tmpa + 32; - tmp = t; - r = tmp + 32; + r = t + 32; } #else r = a = ad; @@ -4915,7 +4950,6 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, qi = dq = dp = dpd; tmpa = tmpad; tmpb = tmpbd; - tmp = a + 32; #endif if (err == MP_OKAY) { @@ -4933,8 +4967,8 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { c = sp_2048_sub_in_place_16(tmpa, tmpb); - sp_2048_mask_16(tmp, p, c); - sp_2048_add_16(tmpa, tmpa, tmp); + c += sp_2048_cond_add_16(tmpa, tmpa, p, c); + sp_2048_cond_add_16(tmpa, tmpa, p, c); sp_2048_from_mp(qi, 16, qim); sp_2048_mul_16(tmpa, tmpa, qi); @@ -4965,6 +4999,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, return err; } +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) @@ -9097,126 +9132,90 @@ static sp_digit sp_3072_cond_sub_24(sp_digit* r, const sp_digit* a, const sp_dig #else __asm__ __volatile__ ( - "ldr x4, [%[a], 0]\n\t" - "ldr x6, [%[a], 8]\n\t" - "ldr x5, [%[b], 0]\n\t" - "ldr x7, [%[b], 8]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "ldp x5, x7, [%[b], 0]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "subs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 0]\n\t" - "str x6, [%[r], 8]\n\t" - "ldr x4, [%[a], 16]\n\t" - "ldr x6, [%[a], 24]\n\t" - "ldr x5, [%[b], 16]\n\t" - "ldr x7, [%[b], 24]\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "ldp x4, x6, [%[a], 16]\n\t" + "ldp x5, x7, [%[b], 16]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 16]\n\t" - "str x6, [%[r], 24]\n\t" - "ldr x4, [%[a], 32]\n\t" - "ldr x6, [%[a], 40]\n\t" - "ldr x5, [%[b], 32]\n\t" - "ldr x7, [%[b], 40]\n\t" + "stp x4, x6, [%[r], 16]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 32]\n\t" - "str x6, [%[r], 40]\n\t" - "ldr x4, [%[a], 48]\n\t" - "ldr x6, [%[a], 56]\n\t" - "ldr x5, [%[b], 48]\n\t" - "ldr x7, [%[b], 56]\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "ldp x4, x6, [%[a], 48]\n\t" + "ldp x5, x7, [%[b], 48]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 48]\n\t" - "str x6, [%[r], 56]\n\t" - "ldr x4, [%[a], 64]\n\t" - "ldr x6, [%[a], 72]\n\t" - "ldr x5, [%[b], 64]\n\t" - "ldr x7, [%[b], 72]\n\t" + "stp x4, x6, [%[r], 48]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 64]\n\t" - "str x6, [%[r], 72]\n\t" - "ldr x4, [%[a], 80]\n\t" - "ldr x6, [%[a], 88]\n\t" - "ldr x5, [%[b], 80]\n\t" - "ldr x7, [%[b], 88]\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "ldp x4, x6, [%[a], 80]\n\t" + "ldp x5, x7, [%[b], 80]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 80]\n\t" - "str x6, [%[r], 88]\n\t" - "ldr x4, [%[a], 96]\n\t" - "ldr x6, [%[a], 104]\n\t" - "ldr x5, [%[b], 96]\n\t" - "ldr x7, [%[b], 104]\n\t" + "stp x4, x6, [%[r], 80]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 96]\n\t" - "str x6, [%[r], 104]\n\t" - "ldr x4, [%[a], 112]\n\t" - "ldr x6, [%[a], 120]\n\t" - "ldr x5, [%[b], 112]\n\t" - "ldr x7, [%[b], 120]\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "ldp x4, x6, [%[a], 112]\n\t" + "ldp x5, x7, [%[b], 112]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 112]\n\t" - "str x6, [%[r], 120]\n\t" - "ldr x4, [%[a], 128]\n\t" - "ldr x6, [%[a], 136]\n\t" - "ldr x5, [%[b], 128]\n\t" - "ldr x7, [%[b], 136]\n\t" + "stp x4, x6, [%[r], 112]\n\t" + "ldp x4, x6, [%[a], 128]\n\t" + "ldp x5, x7, [%[b], 128]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 128]\n\t" - "str x6, [%[r], 136]\n\t" - "ldr x4, [%[a], 144]\n\t" - "ldr x6, [%[a], 152]\n\t" - "ldr x5, [%[b], 144]\n\t" - "ldr x7, [%[b], 152]\n\t" + "stp x4, x6, [%[r], 128]\n\t" + "ldp x4, x6, [%[a], 144]\n\t" + "ldp x5, x7, [%[b], 144]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 144]\n\t" - "str x6, [%[r], 152]\n\t" - "ldr x4, [%[a], 160]\n\t" - "ldr x6, [%[a], 168]\n\t" - "ldr x5, [%[b], 160]\n\t" - "ldr x7, [%[b], 168]\n\t" + "stp x4, x6, [%[r], 144]\n\t" + "ldp x4, x6, [%[a], 160]\n\t" + "ldp x5, x7, [%[b], 160]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 160]\n\t" - "str x6, [%[r], 168]\n\t" - "ldr x4, [%[a], 176]\n\t" - "ldr x6, [%[a], 184]\n\t" - "ldr x5, [%[b], 176]\n\t" - "ldr x7, [%[b], 184]\n\t" + "stp x4, x6, [%[r], 160]\n\t" + "ldp x4, x6, [%[a], 176]\n\t" + "ldp x5, x7, [%[b], 176]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 176]\n\t" - "str x6, [%[r], 184]\n\t" + "stp x4, x6, [%[r], 176]\n\t" "csetm %[c], cc\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) @@ -10466,246 +10465,174 @@ static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a, const sp_dig #else __asm__ __volatile__ ( - "ldr x4, [%[a], 0]\n\t" - "ldr x6, [%[a], 8]\n\t" - "ldr x5, [%[b], 0]\n\t" - "ldr x7, [%[b], 8]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "ldp x5, x7, [%[b], 0]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "subs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 0]\n\t" - "str x6, [%[r], 8]\n\t" - "ldr x4, [%[a], 16]\n\t" - "ldr x6, [%[a], 24]\n\t" - "ldr x5, [%[b], 16]\n\t" - "ldr x7, [%[b], 24]\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "ldp x4, x6, [%[a], 16]\n\t" + "ldp x5, x7, [%[b], 16]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 16]\n\t" - "str x6, [%[r], 24]\n\t" - "ldr x4, [%[a], 32]\n\t" - "ldr x6, [%[a], 40]\n\t" - "ldr x5, [%[b], 32]\n\t" - "ldr x7, [%[b], 40]\n\t" + "stp x4, x6, [%[r], 16]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 32]\n\t" - "str x6, [%[r], 40]\n\t" - "ldr x4, [%[a], 48]\n\t" - "ldr x6, [%[a], 56]\n\t" - "ldr x5, [%[b], 48]\n\t" - "ldr x7, [%[b], 56]\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "ldp x4, x6, [%[a], 48]\n\t" + "ldp x5, x7, [%[b], 48]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 48]\n\t" - "str x6, [%[r], 56]\n\t" - "ldr x4, [%[a], 64]\n\t" - "ldr x6, [%[a], 72]\n\t" - "ldr x5, [%[b], 64]\n\t" - "ldr x7, [%[b], 72]\n\t" + "stp x4, x6, [%[r], 48]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 64]\n\t" - "str x6, [%[r], 72]\n\t" - "ldr x4, [%[a], 80]\n\t" - "ldr x6, [%[a], 88]\n\t" - "ldr x5, [%[b], 80]\n\t" - "ldr x7, [%[b], 88]\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "ldp x4, x6, [%[a], 80]\n\t" + "ldp x5, x7, [%[b], 80]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 80]\n\t" - "str x6, [%[r], 88]\n\t" - "ldr x4, [%[a], 96]\n\t" - "ldr x6, [%[a], 104]\n\t" - "ldr x5, [%[b], 96]\n\t" - "ldr x7, [%[b], 104]\n\t" + "stp x4, x6, [%[r], 80]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 96]\n\t" - "str x6, [%[r], 104]\n\t" - "ldr x4, [%[a], 112]\n\t" - "ldr x6, [%[a], 120]\n\t" - "ldr x5, [%[b], 112]\n\t" - "ldr x7, [%[b], 120]\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "ldp x4, x6, [%[a], 112]\n\t" + "ldp x5, x7, [%[b], 112]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 112]\n\t" - "str x6, [%[r], 120]\n\t" - "ldr x4, [%[a], 128]\n\t" - "ldr x6, [%[a], 136]\n\t" - "ldr x5, [%[b], 128]\n\t" - "ldr x7, [%[b], 136]\n\t" + "stp x4, x6, [%[r], 112]\n\t" + "ldp x4, x6, [%[a], 128]\n\t" + "ldp x5, x7, [%[b], 128]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 128]\n\t" - "str x6, [%[r], 136]\n\t" - "ldr x4, [%[a], 144]\n\t" - "ldr x6, [%[a], 152]\n\t" - "ldr x5, [%[b], 144]\n\t" - "ldr x7, [%[b], 152]\n\t" + "stp x4, x6, [%[r], 128]\n\t" + "ldp x4, x6, [%[a], 144]\n\t" + "ldp x5, x7, [%[b], 144]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 144]\n\t" - "str x6, [%[r], 152]\n\t" - "ldr x4, [%[a], 160]\n\t" - "ldr x6, [%[a], 168]\n\t" - "ldr x5, [%[b], 160]\n\t" - "ldr x7, [%[b], 168]\n\t" + "stp x4, x6, [%[r], 144]\n\t" + "ldp x4, x6, [%[a], 160]\n\t" + "ldp x5, x7, [%[b], 160]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 160]\n\t" - "str x6, [%[r], 168]\n\t" - "ldr x4, [%[a], 176]\n\t" - "ldr x6, [%[a], 184]\n\t" - "ldr x5, [%[b], 176]\n\t" - "ldr x7, [%[b], 184]\n\t" + "stp x4, x6, [%[r], 160]\n\t" + "ldp x4, x6, [%[a], 176]\n\t" + "ldp x5, x7, [%[b], 176]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 176]\n\t" - "str x6, [%[r], 184]\n\t" - "ldr x4, [%[a], 192]\n\t" - "ldr x6, [%[a], 200]\n\t" - "ldr x5, [%[b], 192]\n\t" - "ldr x7, [%[b], 200]\n\t" + "stp x4, x6, [%[r], 176]\n\t" + "ldp x4, x6, [%[a], 192]\n\t" + "ldp x5, x7, [%[b], 192]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 192]\n\t" - "str x6, [%[r], 200]\n\t" - "ldr x4, [%[a], 208]\n\t" - "ldr x6, [%[a], 216]\n\t" - "ldr x5, [%[b], 208]\n\t" - "ldr x7, [%[b], 216]\n\t" + "stp x4, x6, [%[r], 192]\n\t" + "ldp x4, x6, [%[a], 208]\n\t" + "ldp x5, x7, [%[b], 208]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 208]\n\t" - "str x6, [%[r], 216]\n\t" - "ldr x4, [%[a], 224]\n\t" - "ldr x6, [%[a], 232]\n\t" - "ldr x5, [%[b], 224]\n\t" - "ldr x7, [%[b], 232]\n\t" + "stp x4, x6, [%[r], 208]\n\t" + "ldp x4, x6, [%[a], 224]\n\t" + "ldp x5, x7, [%[b], 224]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 224]\n\t" - "str x6, [%[r], 232]\n\t" - "ldr x4, [%[a], 240]\n\t" - "ldr x6, [%[a], 248]\n\t" - "ldr x5, [%[b], 240]\n\t" - "ldr x7, [%[b], 248]\n\t" + "stp x4, x6, [%[r], 224]\n\t" + "ldp x4, x6, [%[a], 240]\n\t" + "ldp x5, x7, [%[b], 240]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 240]\n\t" - "str x6, [%[r], 248]\n\t" - "ldr x4, [%[a], 256]\n\t" - "ldr x6, [%[a], 264]\n\t" - "ldr x5, [%[b], 256]\n\t" - "ldr x7, [%[b], 264]\n\t" + "stp x4, x6, [%[r], 240]\n\t" + "ldp x4, x6, [%[a], 256]\n\t" + "ldp x5, x7, [%[b], 256]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 256]\n\t" - "str x6, [%[r], 264]\n\t" - "ldr x4, [%[a], 272]\n\t" - "ldr x6, [%[a], 280]\n\t" - "ldr x5, [%[b], 272]\n\t" - "ldr x7, [%[b], 280]\n\t" + "stp x4, x6, [%[r], 256]\n\t" + "ldp x4, x6, [%[a], 272]\n\t" + "ldp x5, x7, [%[b], 272]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 272]\n\t" - "str x6, [%[r], 280]\n\t" - "ldr x4, [%[a], 288]\n\t" - "ldr x6, [%[a], 296]\n\t" - "ldr x5, [%[b], 288]\n\t" - "ldr x7, [%[b], 296]\n\t" + "stp x4, x6, [%[r], 272]\n\t" + "ldp x4, x6, [%[a], 288]\n\t" + "ldp x5, x7, [%[b], 288]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 288]\n\t" - "str x6, [%[r], 296]\n\t" - "ldr x4, [%[a], 304]\n\t" - "ldr x6, [%[a], 312]\n\t" - "ldr x5, [%[b], 304]\n\t" - "ldr x7, [%[b], 312]\n\t" + "stp x4, x6, [%[r], 288]\n\t" + "ldp x4, x6, [%[a], 304]\n\t" + "ldp x5, x7, [%[b], 304]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 304]\n\t" - "str x6, [%[r], 312]\n\t" - "ldr x4, [%[a], 320]\n\t" - "ldr x6, [%[a], 328]\n\t" - "ldr x5, [%[b], 320]\n\t" - "ldr x7, [%[b], 328]\n\t" + "stp x4, x6, [%[r], 304]\n\t" + "ldp x4, x6, [%[a], 320]\n\t" + "ldp x5, x7, [%[b], 320]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 320]\n\t" - "str x6, [%[r], 328]\n\t" - "ldr x4, [%[a], 336]\n\t" - "ldr x6, [%[a], 344]\n\t" - "ldr x5, [%[b], 336]\n\t" - "ldr x7, [%[b], 344]\n\t" + "stp x4, x6, [%[r], 320]\n\t" + "ldp x4, x6, [%[a], 336]\n\t" + "ldp x5, x7, [%[b], 336]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 336]\n\t" - "str x6, [%[r], 344]\n\t" - "ldr x4, [%[a], 352]\n\t" - "ldr x6, [%[a], 360]\n\t" - "ldr x5, [%[b], 352]\n\t" - "ldr x7, [%[b], 360]\n\t" + "stp x4, x6, [%[r], 336]\n\t" + "ldp x4, x6, [%[a], 352]\n\t" + "ldp x5, x7, [%[b], 352]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 352]\n\t" - "str x6, [%[r], 360]\n\t" - "ldr x4, [%[a], 368]\n\t" - "ldr x6, [%[a], 376]\n\t" - "ldr x5, [%[b], 368]\n\t" - "ldr x7, [%[b], 376]\n\t" + "stp x4, x6, [%[r], 352]\n\t" + "ldp x4, x6, [%[a], 368]\n\t" + "ldp x5, x7, [%[b], 368]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 368]\n\t" - "str x6, [%[r], 376]\n\t" + "stp x4, x6, [%[r], 368]\n\t" "csetm %[c], cc\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) @@ -12287,6 +12214,147 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, return err; } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_3072_cond_add_24(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "cset %[c], cs\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 192\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8" + ); +#else + __asm__ __volatile__ ( + + "ldp x4, x6, [%[a], 0]\n\t" + "ldp x5, x7, [%[b], 0]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adds x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 8]\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "ldp x4, x6, [%[a], 16]\n\t" + "ldp x5, x7, [%[b], 16]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 24]\n\t" + "stp x4, x6, [%[r], 16]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 40]\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "ldp x4, x6, [%[a], 48]\n\t" + "ldp x5, x7, [%[b], 48]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 56]\n\t" + "stp x4, x6, [%[r], 48]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 72]\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "ldp x4, x6, [%[a], 80]\n\t" + "ldp x5, x7, [%[b], 80]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 88]\n\t" + "stp x4, x6, [%[r], 80]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 104]\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "ldp x4, x6, [%[a], 112]\n\t" + "ldp x5, x7, [%[b], 112]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 120]\n\t" + "stp x4, x6, [%[r], 112]\n\t" + "ldp x4, x6, [%[a], 128]\n\t" + "ldp x5, x7, [%[b], 128]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 136]\n\t" + "stp x4, x6, [%[r], 128]\n\t" + "ldp x4, x6, [%[a], 144]\n\t" + "ldp x5, x7, [%[b], 144]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 152]\n\t" + "stp x4, x6, [%[r], 144]\n\t" + "ldp x4, x6, [%[a], 160]\n\t" + "ldp x5, x7, [%[b], 160]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 168]\n\t" + "stp x4, x6, [%[r], 160]\n\t" + "ldp x4, x6, [%[a], 176]\n\t" + "ldp x5, x7, [%[b], 176]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 184]\n\t" + "stp x4, x6, [%[r], 176]\n\t" + "cset %[c], cs\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -12321,7 +12389,6 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -12351,8 +12418,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 24; tmpb = tmpa + 48; - tmp = t; - r = tmp + 48; + r = t + 48; } #else r = a = ad; @@ -12361,7 +12427,6 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, qi = dq = dp = dpd; tmpa = tmpad; tmpb = tmpbd; - tmp = a + 48; #endif if (err == MP_OKAY) { @@ -12379,8 +12444,8 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { c = sp_3072_sub_in_place_24(tmpa, tmpb); - sp_3072_mask_24(tmp, p, c); - sp_3072_add_24(tmpa, tmpa, tmp); + c += sp_3072_cond_add_24(tmpa, tmpa, p, c); + sp_3072_cond_add_24(tmpa, tmpa, p, c); sp_3072_from_mp(qi, 24, qim); sp_3072_mul_24(tmpa, tmpa, qi); @@ -12411,6 +12476,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, return err; } +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) @@ -27491,326 +27557,230 @@ static sp_digit sp_4096_cond_sub_64(sp_digit* r, const sp_digit* a, const sp_dig #else __asm__ __volatile__ ( - "ldr x4, [%[a], 0]\n\t" - "ldr x6, [%[a], 8]\n\t" - "ldr x5, [%[b], 0]\n\t" - "ldr x7, [%[b], 8]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "ldp x5, x7, [%[b], 0]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "subs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 0]\n\t" - "str x6, [%[r], 8]\n\t" - "ldr x4, [%[a], 16]\n\t" - "ldr x6, [%[a], 24]\n\t" - "ldr x5, [%[b], 16]\n\t" - "ldr x7, [%[b], 24]\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "ldp x4, x6, [%[a], 16]\n\t" + "ldp x5, x7, [%[b], 16]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 16]\n\t" - "str x6, [%[r], 24]\n\t" - "ldr x4, [%[a], 32]\n\t" - "ldr x6, [%[a], 40]\n\t" - "ldr x5, [%[b], 32]\n\t" - "ldr x7, [%[b], 40]\n\t" + "stp x4, x6, [%[r], 16]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 32]\n\t" - "str x6, [%[r], 40]\n\t" - "ldr x4, [%[a], 48]\n\t" - "ldr x6, [%[a], 56]\n\t" - "ldr x5, [%[b], 48]\n\t" - "ldr x7, [%[b], 56]\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "ldp x4, x6, [%[a], 48]\n\t" + "ldp x5, x7, [%[b], 48]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 48]\n\t" - "str x6, [%[r], 56]\n\t" - "ldr x4, [%[a], 64]\n\t" - "ldr x6, [%[a], 72]\n\t" - "ldr x5, [%[b], 64]\n\t" - "ldr x7, [%[b], 72]\n\t" + "stp x4, x6, [%[r], 48]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 64]\n\t" - "str x6, [%[r], 72]\n\t" - "ldr x4, [%[a], 80]\n\t" - "ldr x6, [%[a], 88]\n\t" - "ldr x5, [%[b], 80]\n\t" - "ldr x7, [%[b], 88]\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "ldp x4, x6, [%[a], 80]\n\t" + "ldp x5, x7, [%[b], 80]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 80]\n\t" - "str x6, [%[r], 88]\n\t" - "ldr x4, [%[a], 96]\n\t" - "ldr x6, [%[a], 104]\n\t" - "ldr x5, [%[b], 96]\n\t" - "ldr x7, [%[b], 104]\n\t" + "stp x4, x6, [%[r], 80]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 96]\n\t" - "str x6, [%[r], 104]\n\t" - "ldr x4, [%[a], 112]\n\t" - "ldr x6, [%[a], 120]\n\t" - "ldr x5, [%[b], 112]\n\t" - "ldr x7, [%[b], 120]\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "ldp x4, x6, [%[a], 112]\n\t" + "ldp x5, x7, [%[b], 112]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 112]\n\t" - "str x6, [%[r], 120]\n\t" - "ldr x4, [%[a], 128]\n\t" - "ldr x6, [%[a], 136]\n\t" - "ldr x5, [%[b], 128]\n\t" - "ldr x7, [%[b], 136]\n\t" + "stp x4, x6, [%[r], 112]\n\t" + "ldp x4, x6, [%[a], 128]\n\t" + "ldp x5, x7, [%[b], 128]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 128]\n\t" - "str x6, [%[r], 136]\n\t" - "ldr x4, [%[a], 144]\n\t" - "ldr x6, [%[a], 152]\n\t" - "ldr x5, [%[b], 144]\n\t" - "ldr x7, [%[b], 152]\n\t" + "stp x4, x6, [%[r], 128]\n\t" + "ldp x4, x6, [%[a], 144]\n\t" + "ldp x5, x7, [%[b], 144]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 144]\n\t" - "str x6, [%[r], 152]\n\t" - "ldr x4, [%[a], 160]\n\t" - "ldr x6, [%[a], 168]\n\t" - "ldr x5, [%[b], 160]\n\t" - "ldr x7, [%[b], 168]\n\t" + "stp x4, x6, [%[r], 144]\n\t" + "ldp x4, x6, [%[a], 160]\n\t" + "ldp x5, x7, [%[b], 160]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 160]\n\t" - "str x6, [%[r], 168]\n\t" - "ldr x4, [%[a], 176]\n\t" - "ldr x6, [%[a], 184]\n\t" - "ldr x5, [%[b], 176]\n\t" - "ldr x7, [%[b], 184]\n\t" + "stp x4, x6, [%[r], 160]\n\t" + "ldp x4, x6, [%[a], 176]\n\t" + "ldp x5, x7, [%[b], 176]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 176]\n\t" - "str x6, [%[r], 184]\n\t" - "ldr x4, [%[a], 192]\n\t" - "ldr x6, [%[a], 200]\n\t" - "ldr x5, [%[b], 192]\n\t" - "ldr x7, [%[b], 200]\n\t" + "stp x4, x6, [%[r], 176]\n\t" + "ldp x4, x6, [%[a], 192]\n\t" + "ldp x5, x7, [%[b], 192]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 192]\n\t" - "str x6, [%[r], 200]\n\t" - "ldr x4, [%[a], 208]\n\t" - "ldr x6, [%[a], 216]\n\t" - "ldr x5, [%[b], 208]\n\t" - "ldr x7, [%[b], 216]\n\t" + "stp x4, x6, [%[r], 192]\n\t" + "ldp x4, x6, [%[a], 208]\n\t" + "ldp x5, x7, [%[b], 208]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 208]\n\t" - "str x6, [%[r], 216]\n\t" - "ldr x4, [%[a], 224]\n\t" - "ldr x6, [%[a], 232]\n\t" - "ldr x5, [%[b], 224]\n\t" - "ldr x7, [%[b], 232]\n\t" + "stp x4, x6, [%[r], 208]\n\t" + "ldp x4, x6, [%[a], 224]\n\t" + "ldp x5, x7, [%[b], 224]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 224]\n\t" - "str x6, [%[r], 232]\n\t" - "ldr x4, [%[a], 240]\n\t" - "ldr x6, [%[a], 248]\n\t" - "ldr x5, [%[b], 240]\n\t" - "ldr x7, [%[b], 248]\n\t" + "stp x4, x6, [%[r], 224]\n\t" + "ldp x4, x6, [%[a], 240]\n\t" + "ldp x5, x7, [%[b], 240]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 240]\n\t" - "str x6, [%[r], 248]\n\t" - "ldr x4, [%[a], 256]\n\t" - "ldr x6, [%[a], 264]\n\t" - "ldr x5, [%[b], 256]\n\t" - "ldr x7, [%[b], 264]\n\t" + "stp x4, x6, [%[r], 240]\n\t" + "ldp x4, x6, [%[a], 256]\n\t" + "ldp x5, x7, [%[b], 256]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 256]\n\t" - "str x6, [%[r], 264]\n\t" - "ldr x4, [%[a], 272]\n\t" - "ldr x6, [%[a], 280]\n\t" - "ldr x5, [%[b], 272]\n\t" - "ldr x7, [%[b], 280]\n\t" + "stp x4, x6, [%[r], 256]\n\t" + "ldp x4, x6, [%[a], 272]\n\t" + "ldp x5, x7, [%[b], 272]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 272]\n\t" - "str x6, [%[r], 280]\n\t" - "ldr x4, [%[a], 288]\n\t" - "ldr x6, [%[a], 296]\n\t" - "ldr x5, [%[b], 288]\n\t" - "ldr x7, [%[b], 296]\n\t" + "stp x4, x6, [%[r], 272]\n\t" + "ldp x4, x6, [%[a], 288]\n\t" + "ldp x5, x7, [%[b], 288]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 288]\n\t" - "str x6, [%[r], 296]\n\t" - "ldr x4, [%[a], 304]\n\t" - "ldr x6, [%[a], 312]\n\t" - "ldr x5, [%[b], 304]\n\t" - "ldr x7, [%[b], 312]\n\t" + "stp x4, x6, [%[r], 288]\n\t" + "ldp x4, x6, [%[a], 304]\n\t" + "ldp x5, x7, [%[b], 304]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 304]\n\t" - "str x6, [%[r], 312]\n\t" - "ldr x4, [%[a], 320]\n\t" - "ldr x6, [%[a], 328]\n\t" - "ldr x5, [%[b], 320]\n\t" - "ldr x7, [%[b], 328]\n\t" + "stp x4, x6, [%[r], 304]\n\t" + "ldp x4, x6, [%[a], 320]\n\t" + "ldp x5, x7, [%[b], 320]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 320]\n\t" - "str x6, [%[r], 328]\n\t" - "ldr x4, [%[a], 336]\n\t" - "ldr x6, [%[a], 344]\n\t" - "ldr x5, [%[b], 336]\n\t" - "ldr x7, [%[b], 344]\n\t" + "stp x4, x6, [%[r], 320]\n\t" + "ldp x4, x6, [%[a], 336]\n\t" + "ldp x5, x7, [%[b], 336]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 336]\n\t" - "str x6, [%[r], 344]\n\t" - "ldr x4, [%[a], 352]\n\t" - "ldr x6, [%[a], 360]\n\t" - "ldr x5, [%[b], 352]\n\t" - "ldr x7, [%[b], 360]\n\t" + "stp x4, x6, [%[r], 336]\n\t" + "ldp x4, x6, [%[a], 352]\n\t" + "ldp x5, x7, [%[b], 352]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 352]\n\t" - "str x6, [%[r], 360]\n\t" - "ldr x4, [%[a], 368]\n\t" - "ldr x6, [%[a], 376]\n\t" - "ldr x5, [%[b], 368]\n\t" - "ldr x7, [%[b], 376]\n\t" + "stp x4, x6, [%[r], 352]\n\t" + "ldp x4, x6, [%[a], 368]\n\t" + "ldp x5, x7, [%[b], 368]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 368]\n\t" - "str x6, [%[r], 376]\n\t" - "ldr x4, [%[a], 384]\n\t" - "ldr x6, [%[a], 392]\n\t" - "ldr x5, [%[b], 384]\n\t" - "ldr x7, [%[b], 392]\n\t" + "stp x4, x6, [%[r], 368]\n\t" + "ldp x4, x6, [%[a], 384]\n\t" + "ldp x5, x7, [%[b], 384]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 384]\n\t" - "str x6, [%[r], 392]\n\t" - "ldr x4, [%[a], 400]\n\t" - "ldr x6, [%[a], 408]\n\t" - "ldr x5, [%[b], 400]\n\t" - "ldr x7, [%[b], 408]\n\t" + "stp x4, x6, [%[r], 384]\n\t" + "ldp x4, x6, [%[a], 400]\n\t" + "ldp x5, x7, [%[b], 400]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 400]\n\t" - "str x6, [%[r], 408]\n\t" - "ldr x4, [%[a], 416]\n\t" - "ldr x6, [%[a], 424]\n\t" - "ldr x5, [%[b], 416]\n\t" - "ldr x7, [%[b], 424]\n\t" + "stp x4, x6, [%[r], 400]\n\t" + "ldp x4, x6, [%[a], 416]\n\t" + "ldp x5, x7, [%[b], 416]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 416]\n\t" - "str x6, [%[r], 424]\n\t" - "ldr x4, [%[a], 432]\n\t" - "ldr x6, [%[a], 440]\n\t" - "ldr x5, [%[b], 432]\n\t" - "ldr x7, [%[b], 440]\n\t" + "stp x4, x6, [%[r], 416]\n\t" + "ldp x4, x6, [%[a], 432]\n\t" + "ldp x5, x7, [%[b], 432]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 432]\n\t" - "str x6, [%[r], 440]\n\t" - "ldr x4, [%[a], 448]\n\t" - "ldr x6, [%[a], 456]\n\t" - "ldr x5, [%[b], 448]\n\t" - "ldr x7, [%[b], 456]\n\t" + "stp x4, x6, [%[r], 432]\n\t" + "ldp x4, x6, [%[a], 448]\n\t" + "ldp x5, x7, [%[b], 448]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 448]\n\t" - "str x6, [%[r], 456]\n\t" - "ldr x4, [%[a], 464]\n\t" - "ldr x6, [%[a], 472]\n\t" - "ldr x5, [%[b], 464]\n\t" - "ldr x7, [%[b], 472]\n\t" + "stp x4, x6, [%[r], 448]\n\t" + "ldp x4, x6, [%[a], 464]\n\t" + "ldp x5, x7, [%[b], 464]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 464]\n\t" - "str x6, [%[r], 472]\n\t" - "ldr x4, [%[a], 480]\n\t" - "ldr x6, [%[a], 488]\n\t" - "ldr x5, [%[b], 480]\n\t" - "ldr x7, [%[b], 488]\n\t" + "stp x4, x6, [%[r], 464]\n\t" + "ldp x4, x6, [%[a], 480]\n\t" + "ldp x5, x7, [%[b], 480]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 480]\n\t" - "str x6, [%[r], 488]\n\t" - "ldr x4, [%[a], 496]\n\t" - "ldr x6, [%[a], 504]\n\t" - "ldr x5, [%[b], 496]\n\t" - "ldr x7, [%[b], 504]\n\t" + "stp x4, x6, [%[r], 480]\n\t" + "ldp x4, x6, [%[a], 496]\n\t" + "ldp x5, x7, [%[b], 496]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 496]\n\t" - "str x6, [%[r], 504]\n\t" + "stp x4, x6, [%[r], 496]\n\t" "csetm %[c], cc\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) @@ -29680,6 +29650,179 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, return err; } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +static sp_digit sp_4096_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + +#ifdef WOLFSSL_SP_SMALL + __asm__ __volatile__ ( + "mov x8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "cset %[c], cs\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 256\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8" + ); +#else + __asm__ __volatile__ ( + + "ldp x4, x6, [%[a], 0]\n\t" + "ldp x5, x7, [%[b], 0]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adds x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 8]\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "ldp x4, x6, [%[a], 16]\n\t" + "ldp x5, x7, [%[b], 16]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 24]\n\t" + "stp x4, x6, [%[r], 16]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 40]\n\t" + "stp x4, x6, [%[r], 32]\n\t" + "ldp x4, x6, [%[a], 48]\n\t" + "ldp x5, x7, [%[b], 48]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 56]\n\t" + "stp x4, x6, [%[r], 48]\n\t" + "ldp x4, x6, [%[a], 64]\n\t" + "ldp x5, x7, [%[b], 64]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 72]\n\t" + "stp x4, x6, [%[r], 64]\n\t" + "ldp x4, x6, [%[a], 80]\n\t" + "ldp x5, x7, [%[b], 80]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 88]\n\t" + "stp x4, x6, [%[r], 80]\n\t" + "ldp x4, x6, [%[a], 96]\n\t" + "ldp x5, x7, [%[b], 96]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 104]\n\t" + "stp x4, x6, [%[r], 96]\n\t" + "ldp x4, x6, [%[a], 112]\n\t" + "ldp x5, x7, [%[b], 112]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 120]\n\t" + "stp x4, x6, [%[r], 112]\n\t" + "ldp x4, x6, [%[a], 128]\n\t" + "ldp x5, x7, [%[b], 128]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 136]\n\t" + "stp x4, x6, [%[r], 128]\n\t" + "ldp x4, x6, [%[a], 144]\n\t" + "ldp x5, x7, [%[b], 144]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 152]\n\t" + "stp x4, x6, [%[r], 144]\n\t" + "ldp x4, x6, [%[a], 160]\n\t" + "ldp x5, x7, [%[b], 160]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 168]\n\t" + "stp x4, x6, [%[r], 160]\n\t" + "ldp x4, x6, [%[a], 176]\n\t" + "ldp x5, x7, [%[b], 176]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 184]\n\t" + "stp x4, x6, [%[r], 176]\n\t" + "ldp x4, x6, [%[a], 192]\n\t" + "ldp x5, x7, [%[b], 192]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 200]\n\t" + "stp x4, x6, [%[r], 192]\n\t" + "ldp x4, x6, [%[a], 208]\n\t" + "ldp x5, x7, [%[b], 208]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 216]\n\t" + "stp x4, x6, [%[r], 208]\n\t" + "ldp x4, x6, [%[a], 224]\n\t" + "ldp x5, x7, [%[b], 224]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 232]\n\t" + "stp x4, x6, [%[r], 224]\n\t" + "ldp x4, x6, [%[a], 240]\n\t" + "ldp x5, x7, [%[b], 240]\n\t" + "and x5, x5, %[m]\n\t" + "and x7, x7, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "adcs x6, x6, x7\n\t" + "str x6, [%[r], 248]\n\t" + "stp x4, x6, [%[r], 240]\n\t" + "cset %[c], cs\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8" + ); +#endif /* WOLFSSL_SP_SMALL */ + + return c; +} + /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -29714,7 +29857,6 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -29744,8 +29886,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 32; tmpb = tmpa + 64; - tmp = t; - r = tmp + 64; + r = t + 64; } #else r = a = ad; @@ -29754,7 +29895,6 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, qi = dq = dp = dpd; tmpa = tmpad; tmpb = tmpbd; - tmp = a + 64; #endif if (err == MP_OKAY) { @@ -29772,8 +29912,8 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { c = sp_4096_sub_in_place_32(tmpa, tmpb); - sp_4096_mask_32(tmp, p, c); - sp_4096_add_32(tmpa, tmpa, tmp); + c += sp_4096_cond_add_32(tmpa, tmpa, p, c); + sp_4096_cond_add_32(tmpa, tmpa, p, c); sp_4096_from_mp(qi, 32, qim); sp_4096_mul_32(tmpa, tmpa, qi); @@ -29804,6 +29944,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, return err; } +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) @@ -31467,26 +31608,20 @@ static sp_digit sp_256_cond_sub_4(sp_digit* r, const sp_digit* a, const sp_digit __asm__ __volatile__ ( - "ldr x4, [%[a], 0]\n\t" - "ldr x6, [%[a], 8]\n\t" - "ldr x5, [%[b], 0]\n\t" - "ldr x7, [%[b], 8]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "ldp x5, x7, [%[b], 0]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "subs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 0]\n\t" - "str x6, [%[r], 8]\n\t" - "ldr x4, [%[a], 16]\n\t" - "ldr x6, [%[a], 24]\n\t" - "ldr x5, [%[b], 16]\n\t" - "ldr x7, [%[b], 24]\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "ldp x4, x6, [%[a], 16]\n\t" + "ldp x5, x7, [%[b], 16]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 16]\n\t" - "str x6, [%[r], 24]\n\t" + "stp x4, x6, [%[r], 16]\n\t" "csetm %[c], cc\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) @@ -49133,36 +49268,27 @@ static sp_digit sp_384_cond_sub_6(sp_digit* r, const sp_digit* a, const sp_digit #else __asm__ __volatile__ ( - "ldr x4, [%[a], 0]\n\t" - "ldr x6, [%[a], 8]\n\t" - "ldr x5, [%[b], 0]\n\t" - "ldr x7, [%[b], 8]\n\t" + "ldp x4, x6, [%[a], 0]\n\t" + "ldp x5, x7, [%[b], 0]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "subs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 0]\n\t" - "str x6, [%[r], 8]\n\t" - "ldr x4, [%[a], 16]\n\t" - "ldr x6, [%[a], 24]\n\t" - "ldr x5, [%[b], 16]\n\t" - "ldr x7, [%[b], 24]\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "ldp x4, x6, [%[a], 16]\n\t" + "ldp x5, x7, [%[b], 16]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 16]\n\t" - "str x6, [%[r], 24]\n\t" - "ldr x4, [%[a], 32]\n\t" - "ldr x6, [%[a], 40]\n\t" - "ldr x5, [%[b], 32]\n\t" - "ldr x7, [%[b], 40]\n\t" + "stp x4, x6, [%[r], 16]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "sbcs x4, x4, x5\n\t" "sbcs x6, x6, x7\n\t" - "str x4, [%[r], 32]\n\t" - "str x6, [%[r], 40]\n\t" + "stp x4, x6, [%[r], 32]\n\t" "csetm %[c], cc\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) @@ -49948,42 +50074,57 @@ static sp_digit sp_384_cond_add_6(sp_digit* r, const sp_digit* a, const sp_digit { sp_digit c = 0; +#ifdef WOLFSSL_SP_SMALL __asm__ __volatile__ ( - "ldr x4, [%[a], 0]\n\t" - "ldr x6, [%[a], 8]\n\t" - "ldr x5, [%[b], 0]\n\t" - "ldr x7, [%[b], 8]\n\t" + "mov x8, #0\n\t" + "1:\n\t" + "adds %[c], %[c], #-1\n\t" + "ldr x4, [%[a], x8]\n\t" + "ldr x5, [%[b], x8]\n\t" + "and x5, x5, %[m]\n\t" + "adcs x4, x4, x5\n\t" + "cset %[c], cs\n\t" + "str x4, [%[r], x8]\n\t" + "add x8, x8, #8\n\t" + "cmp x8, 48\n\t" + "b.lt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "x4", "x6", "x5", "x7", "x8" + ); +#else + __asm__ __volatile__ ( + + "ldp x4, x6, [%[a], 0]\n\t" + "ldp x5, x7, [%[b], 0]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "adds x4, x4, x5\n\t" "adcs x6, x6, x7\n\t" - "str x4, [%[r], 0]\n\t" "str x6, [%[r], 8]\n\t" - "ldr x4, [%[a], 16]\n\t" - "ldr x6, [%[a], 24]\n\t" - "ldr x5, [%[b], 16]\n\t" - "ldr x7, [%[b], 24]\n\t" + "stp x4, x6, [%[r], 0]\n\t" + "ldp x4, x6, [%[a], 16]\n\t" + "ldp x5, x7, [%[b], 16]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "adcs x4, x4, x5\n\t" "adcs x6, x6, x7\n\t" - "str x4, [%[r], 16]\n\t" "str x6, [%[r], 24]\n\t" - "ldr x4, [%[a], 32]\n\t" - "ldr x6, [%[a], 40]\n\t" - "ldr x5, [%[b], 32]\n\t" - "ldr x7, [%[b], 40]\n\t" + "stp x4, x6, [%[r], 16]\n\t" + "ldp x4, x6, [%[a], 32]\n\t" + "ldp x5, x7, [%[b], 32]\n\t" "and x5, x5, %[m]\n\t" "and x7, x7, %[m]\n\t" "adcs x4, x4, x5\n\t" "adcs x6, x6, x7\n\t" - "str x4, [%[r], 32]\n\t" "str x6, [%[r], 40]\n\t" + "stp x4, x6, [%[r], 32]\n\t" "cset %[c], cs\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "x4", "x6", "x5", "x7" + : "memory", "x4", "x6", "x5", "x7", "x8" ); +#endif /* WOLFSSL_SP_SMALL */ return c; } diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c index 5c6b01ab0..38fd06ecd 100644 --- a/wolfcrypt/src/sp_armthumb.c +++ b/wolfcrypt/src/sp_armthumb.c @@ -4358,6 +4358,46 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, return err; } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #128\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, #1\n\t" + "add r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "adc r5, r6\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -4392,7 +4432,6 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -4422,8 +4461,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 32; tmpb = tmpa + 64; - tmp = t; - r = tmp + 64; + r = t + 64; } #else r = a = ad; @@ -4432,7 +4470,6 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, qi = dq = dp = dpd; tmpa = tmpad; tmpb = tmpbd; - tmp = a + 64; #endif if (err == MP_OKAY) { @@ -4450,8 +4487,8 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { c = sp_2048_sub_in_place_32(tmpa, tmpb); - sp_2048_mask_32(tmp, p, c); - sp_2048_add_32(tmpa, tmpa, tmp); + c += sp_2048_cond_add_32(tmpa, tmpa, p, c); + sp_2048_cond_add_32(tmpa, tmpa, p, c); sp_2048_from_mp(qi, 32, qim); sp_2048_mul_32(tmpa, tmpa, qi); @@ -4482,6 +4519,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, return err; } +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) @@ -10061,6 +10099,46 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, return err; } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #192\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, #1\n\t" + "add r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "adc r5, r6\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -10095,7 +10173,6 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -10125,8 +10202,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 48; tmpb = tmpa + 96; - tmp = t; - r = tmp + 96; + r = t + 96; } #else r = a = ad; @@ -10135,7 +10211,6 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, qi = dq = dp = dpd; tmpa = tmpad; tmpb = tmpbd; - tmp = a + 96; #endif if (err == MP_OKAY) { @@ -10153,8 +10228,8 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { c = sp_3072_sub_in_place_48(tmpa, tmpb); - sp_3072_mask_48(tmp, p, c); - sp_3072_add_48(tmpa, tmpa, tmp); + c += sp_3072_cond_add_48(tmpa, tmpa, p, c); + sp_3072_cond_add_48(tmpa, tmpa, p, c); sp_3072_from_mp(qi, 48, qim); sp_3072_mul_48(tmpa, tmpa, qi); @@ -10185,6 +10260,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, return err; } +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) @@ -14423,6 +14499,47 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, return err; } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #1\n\t" + "lsl r5, r5, #8\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" + "1:\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, #1\n\t" + "add r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "adc r5, r6\n\t" + "mov %[c], #0\n\t" + "adc %[c], %[c]\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r7", "r8" + ); + + return c; +} + /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -14457,7 +14574,6 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -14487,8 +14603,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 64; tmpb = tmpa + 128; - tmp = t; - r = tmp + 128; + r = t + 128; } #else r = a = ad; @@ -14497,7 +14612,6 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, qi = dq = dp = dpd; tmpa = tmpad; tmpb = tmpbd; - tmp = a + 128; #endif if (err == MP_OKAY) { @@ -14515,8 +14629,8 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { c = sp_4096_sub_in_place_64(tmpa, tmpb); - sp_4096_mask_64(tmp, p, c); - sp_4096_add_64(tmpa, tmpa, tmp); + c += sp_4096_cond_add_64(tmpa, tmpa, p, c); + sp_4096_cond_add_64(tmpa, tmpa, p, c); sp_4096_from_mp(qi, 64, qim); sp_4096_mul_64(tmpa, tmpa, qi); @@ -14547,6 +14661,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, return err; } +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) @@ -23083,28 +23198,25 @@ SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, c sp_digit c = 0; __asm__ __volatile__ ( - "mov r5, #0\n\t" - "mov r7, %[a]\n\t" - "add r7, #48\n\t" - "sub r5, #1\n\t" - "mov r8, r7\n\t" + "mov r5, #48\n\t" + "mov r8, r5\n\t" + "mov r7, #0\n\t" "1:\n\t" - "ldr r6, [%[a]]\n\t" - "ldr r7, [%[b]]\n\t" - "and r7, %[m]\n\t" - "add %[c], r5\n\t" - "adc r6, r7\n\t" - "str r6, [%[r]]\n\t" + "ldr r6, [%[b], r7]\n\t" + "and r6, %[m]\n\t" + "mov r5, #0\n\t" + "sub r5, #1\n\t" + "add r5, %[c]\n\t" + "ldr r5, [%[a], r7]\n\t" + "adc r5, r6\n\t" "mov %[c], #0\n\t" "adc %[c], %[c]\n\t" - "add %[a], $4\n\t" - "add %[b], $4\n\t" - "add %[r], $4\n\t" - "mov r7, r8\n\t" - "cmp %[a], r7\n\t" + "str r5, [%[r], r7]\n\t" + "add r7, #4\n\t" + "cmp r7, r8\n\t" "blt 1b\n\t" - : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [c] "+r" (c) - : [m] "r" (m) + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) : "memory", "r5", "r6", "r7", "r8" ); diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index a308dcf0c..b3d5ade05 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -3128,44 +3128,6 @@ static int sp_2048_mod_exp_90(sp_digit* r, const sp_digit* a, const sp_digit* e, #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */ /* WOLFSSL_HAVE_SP_DH */ -#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \ - !defined(RSA_LOW_MEM) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) -/* AND m into each word of a and store in r. - * - * r A single precision integer. - * a A single precision integer. - * m Mask to AND against each digit. - */ -static void sp_2048_mask_45(sp_digit* r, const sp_digit* a, sp_digit m) -{ -#ifdef WOLFSSL_SP_SMALL - int i; - - for (i=0; i<45; i++) { - r[i] = a[i] & m; - } -#else - int i; - - for (i = 0; i < 40; i += 8) { - r[i+0] = a[i+0] & m; - r[i+1] = a[i+1] & m; - r[i+2] = a[i+2] & m; - r[i+3] = a[i+3] & m; - r[i+4] = a[i+4] & m; - r[i+5] = a[i+5] & m; - r[i+6] = a[i+6] & m; - r[i+7] = a[i+7] & m; - } - r[40] = a[40] & m; - r[41] = a[41] & m; - r[42] = a[42] & m; - r[43] = a[43] & m; - r[44] = a[44] & m; -#endif -} - -#endif #ifdef WOLFSSL_HAVE_SP_RSA /* RSA public key operation. * @@ -3397,6 +3359,8 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) +#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -3526,7 +3490,6 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -3562,8 +3525,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 45; tmpb = tmpa + 90; - tmp = t; - r = tmp + 90; + r = t + 90; sp_2048_from_bin(a, 90, in, inLen); sp_2048_from_mp(p, 45, pm); @@ -3577,8 +3539,8 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, } if (err == MP_OKAY) { (void)sp_2048_sub_45(tmpa, tmpa, tmpb); - sp_2048_mask_45(tmp, p, 0 - ((sp_int_digit)tmpa[44] >> 31)); - (void)sp_2048_add_45(tmpa, tmpa, tmp); + sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31)); + sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31)); sp_2048_from_mp(qi, 45, qim); sp_2048_mul_45(tmpa, tmpa, qi); @@ -3603,7 +3565,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, #else sp_digit a[90 * 2]; sp_digit p[45], q[45], dp[45], dq[45], qi[45]; - sp_digit tmp[90], tmpa[90], tmpb[90]; + sp_digit tmpa[90], tmpb[90]; sp_digit* r = a; int err = MP_OKAY; @@ -3638,8 +3600,8 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { (void)sp_2048_sub_45(tmpa, tmpa, tmpb); - sp_2048_mask_45(tmp, p, 0 - ((sp_int_digit)tmpa[44] >> 31)); - (void)sp_2048_add_45(tmpa, tmpa, tmp); + sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31)); + sp_2048_cond_add_45(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[44] >> 31)); sp_2048_mul_45(tmpa, tmpa, qi); err = sp_2048_mod_45(tmpa, tmpa, p); } @@ -7003,42 +6965,6 @@ static int sp_3072_mod_exp_134(sp_digit* r, const sp_digit* a, const sp_digit* e #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */ /* WOLFSSL_HAVE_SP_DH */ -#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \ - !defined(RSA_LOW_MEM) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) -/* AND m into each word of a and store in r. - * - * r A single precision integer. - * a A single precision integer. - * m Mask to AND against each digit. - */ -static void sp_3072_mask_67(sp_digit* r, const sp_digit* a, sp_digit m) -{ -#ifdef WOLFSSL_SP_SMALL - int i; - - for (i=0; i<67; i++) { - r[i] = a[i] & m; - } -#else - int i; - - for (i = 0; i < 64; i += 8) { - r[i+0] = a[i+0] & m; - r[i+1] = a[i+1] & m; - r[i+2] = a[i+2] & m; - r[i+3] = a[i+3] & m; - r[i+4] = a[i+4] & m; - r[i+5] = a[i+5] & m; - r[i+6] = a[i+6] & m; - r[i+7] = a[i+7] & m; - } - r[64] = a[64] & m; - r[65] = a[65] & m; - r[66] = a[66] & m; -#endif -} - -#endif #ifdef WOLFSSL_HAVE_SP_RSA /* RSA public key operation. * @@ -7270,6 +7196,8 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) +#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -7399,7 +7327,6 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -7435,8 +7362,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 67; tmpb = tmpa + 134; - tmp = t; - r = tmp + 134; + r = t + 134; sp_3072_from_bin(a, 134, in, inLen); sp_3072_from_mp(p, 67, pm); @@ -7450,8 +7376,8 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, } if (err == MP_OKAY) { (void)sp_3072_sub_67(tmpa, tmpa, tmpb); - sp_3072_mask_67(tmp, p, 0 - ((sp_int_digit)tmpa[66] >> 31)); - (void)sp_3072_add_67(tmpa, tmpa, tmp); + sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31)); + sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31)); sp_3072_from_mp(qi, 67, qim); sp_3072_mul_67(tmpa, tmpa, qi); @@ -7476,7 +7402,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, #else sp_digit a[134 * 2]; sp_digit p[67], q[67], dp[67], dq[67], qi[67]; - sp_digit tmp[134], tmpa[134], tmpb[134]; + sp_digit tmpa[134], tmpb[134]; sp_digit* r = a; int err = MP_OKAY; @@ -7511,8 +7437,8 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { (void)sp_3072_sub_67(tmpa, tmpa, tmpb); - sp_3072_mask_67(tmp, p, 0 - ((sp_int_digit)tmpa[66] >> 31)); - (void)sp_3072_add_67(tmpa, tmpa, tmp); + sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31)); + sp_3072_cond_add_67(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[66] >> 31)); sp_3072_mul_67(tmpa, tmpa, qi); err = sp_3072_mod_67(tmpa, tmpa, p); } @@ -11040,41 +10966,6 @@ static int sp_4096_mod_exp_196(sp_digit* r, const sp_digit* a, const sp_digit* e #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */ /* WOLFSSL_HAVE_SP_DH */ -#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \ - !defined(RSA_LOW_MEM) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) -/* AND m into each word of a and store in r. - * - * r A single precision integer. - * a A single precision integer. - * m Mask to AND against each digit. - */ -static void sp_4096_mask_98(sp_digit* r, const sp_digit* a, sp_digit m) -{ -#ifdef WOLFSSL_SP_SMALL - int i; - - for (i=0; i<98; i++) { - r[i] = a[i] & m; - } -#else - int i; - - for (i = 0; i < 96; i += 8) { - r[i+0] = a[i+0] & m; - r[i+1] = a[i+1] & m; - r[i+2] = a[i+2] & m; - r[i+3] = a[i+3] & m; - r[i+4] = a[i+4] & m; - r[i+5] = a[i+5] & m; - r[i+6] = a[i+6] & m; - r[i+7] = a[i+7] & m; - } - r[96] = a[96] & m; - r[97] = a[97] & m; -#endif -} - -#endif #ifdef WOLFSSL_HAVE_SP_RSA /* RSA public key operation. * @@ -11306,6 +11197,8 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) +#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -11435,7 +11328,6 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -11471,8 +11363,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 98; tmpb = tmpa + 196; - tmp = t; - r = tmp + 196; + r = t + 196; sp_4096_from_bin(a, 196, in, inLen); sp_4096_from_mp(p, 98, pm); @@ -11486,8 +11377,8 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, } if (err == MP_OKAY) { (void)sp_4096_sub_98(tmpa, tmpa, tmpb); - sp_4096_mask_98(tmp, p, 0 - ((sp_int_digit)tmpa[97] >> 31)); - (void)sp_4096_add_98(tmpa, tmpa, tmp); + sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31)); + sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31)); sp_4096_from_mp(qi, 98, qim); sp_4096_mul_98(tmpa, tmpa, qi); @@ -11512,7 +11403,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, #else sp_digit a[196 * 2]; sp_digit p[98], q[98], dp[98], dq[98], qi[98]; - sp_digit tmp[196], tmpa[196], tmpb[196]; + sp_digit tmpa[196], tmpb[196]; sp_digit* r = a; int err = MP_OKAY; @@ -11547,8 +11438,8 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { (void)sp_4096_sub_98(tmpa, tmpa, tmpb); - sp_4096_mask_98(tmp, p, 0 - ((sp_int_digit)tmpa[97] >> 31)); - (void)sp_4096_add_98(tmpa, tmpa, tmp); + sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31)); + sp_4096_cond_add_98(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[97] >> 31)); sp_4096_mul_98(tmpa, tmpa, qi); err = sp_4096_mod_98(tmpa, tmpa, p); } diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c index 814d06d46..36fcbf4ba 100644 --- a/wolfcrypt/src/sp_c64.c +++ b/wolfcrypt/src/sp_c64.c @@ -2768,41 +2768,6 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */ /* WOLFSSL_HAVE_SP_DH */ -#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \ - !defined(RSA_LOW_MEM) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) -/* AND m into each word of a and store in r. - * - * r A single precision integer. - * a A single precision integer. - * m Mask to AND against each digit. - */ -static void sp_2048_mask_18(sp_digit* r, const sp_digit* a, sp_digit m) -{ -#ifdef WOLFSSL_SP_SMALL - int i; - - for (i=0; i<18; i++) { - r[i] = a[i] & m; - } -#else - int i; - - for (i = 0; i < 16; i += 8) { - r[i+0] = a[i+0] & m; - r[i+1] = a[i+1] & m; - r[i+2] = a[i+2] & m; - r[i+3] = a[i+3] & m; - r[i+4] = a[i+4] & m; - r[i+5] = a[i+5] & m; - r[i+6] = a[i+6] & m; - r[i+7] = a[i+7] & m; - } - r[16] = a[16] & m; - r[17] = a[17] & m; -#endif -} - -#endif #ifdef WOLFSSL_HAVE_SP_RSA /* RSA public key operation. * @@ -3034,6 +2999,8 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) +#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -3163,7 +3130,6 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -3199,8 +3165,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 18; tmpb = tmpa + 36; - tmp = t; - r = tmp + 36; + r = t + 36; sp_2048_from_bin(a, 36, in, inLen); sp_2048_from_mp(p, 18, pm); @@ -3214,8 +3179,8 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, } if (err == MP_OKAY) { (void)sp_2048_sub_18(tmpa, tmpa, tmpb); - sp_2048_mask_18(tmp, p, 0 - ((sp_int_digit)tmpa[17] >> 63)); - (void)sp_2048_add_18(tmpa, tmpa, tmp); + sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63)); + sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63)); sp_2048_from_mp(qi, 18, qim); sp_2048_mul_18(tmpa, tmpa, qi); @@ -3240,7 +3205,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, #else sp_digit a[36 * 2]; sp_digit p[18], q[18], dp[18], dq[18], qi[18]; - sp_digit tmp[36], tmpa[36], tmpb[36]; + sp_digit tmpa[36], tmpb[36]; sp_digit* r = a; int err = MP_OKAY; @@ -3275,8 +3240,8 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { (void)sp_2048_sub_18(tmpa, tmpa, tmpb); - sp_2048_mask_18(tmp, p, 0 - ((sp_int_digit)tmpa[17] >> 63)); - (void)sp_2048_add_18(tmpa, tmpa, tmp); + sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63)); + sp_2048_cond_add_18(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[17] >> 63)); sp_2048_mul_18(tmpa, tmpa, qi); err = sp_2048_mod_18(tmpa, tmpa, p); } @@ -6914,42 +6879,6 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */ /* WOLFSSL_HAVE_SP_DH */ -#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \ - !defined(RSA_LOW_MEM) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) -/* AND m into each word of a and store in r. - * - * r A single precision integer. - * a A single precision integer. - * m Mask to AND against each digit. - */ -static void sp_3072_mask_27(sp_digit* r, const sp_digit* a, sp_digit m) -{ -#ifdef WOLFSSL_SP_SMALL - int i; - - for (i=0; i<27; i++) { - r[i] = a[i] & m; - } -#else - int i; - - for (i = 0; i < 24; i += 8) { - r[i+0] = a[i+0] & m; - r[i+1] = a[i+1] & m; - r[i+2] = a[i+2] & m; - r[i+3] = a[i+3] & m; - r[i+4] = a[i+4] & m; - r[i+5] = a[i+5] & m; - r[i+6] = a[i+6] & m; - r[i+7] = a[i+7] & m; - } - r[24] = a[24] & m; - r[25] = a[25] & m; - r[26] = a[26] & m; -#endif -} - -#endif #ifdef WOLFSSL_HAVE_SP_RSA /* RSA public key operation. * @@ -7181,6 +7110,8 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) +#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -7310,7 +7241,6 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -7346,8 +7276,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 27; tmpb = tmpa + 54; - tmp = t; - r = tmp + 54; + r = t + 54; sp_3072_from_bin(a, 54, in, inLen); sp_3072_from_mp(p, 27, pm); @@ -7361,8 +7290,8 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, } if (err == MP_OKAY) { (void)sp_3072_sub_27(tmpa, tmpa, tmpb); - sp_3072_mask_27(tmp, p, 0 - ((sp_int_digit)tmpa[26] >> 63)); - (void)sp_3072_add_27(tmpa, tmpa, tmp); + sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63)); + sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63)); sp_3072_from_mp(qi, 27, qim); sp_3072_mul_27(tmpa, tmpa, qi); @@ -7387,7 +7316,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, #else sp_digit a[54 * 2]; sp_digit p[27], q[27], dp[27], dq[27], qi[27]; - sp_digit tmp[54], tmpa[54], tmpb[54]; + sp_digit tmpa[54], tmpb[54]; sp_digit* r = a; int err = MP_OKAY; @@ -7422,8 +7351,8 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { (void)sp_3072_sub_27(tmpa, tmpa, tmpb); - sp_3072_mask_27(tmp, p, 0 - ((sp_int_digit)tmpa[26] >> 63)); - (void)sp_3072_add_27(tmpa, tmpa, tmp); + sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63)); + sp_3072_cond_add_27(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[26] >> 63)); sp_3072_mul_27(tmpa, tmpa, qi); err = sp_3072_mod_27(tmpa, tmpa, p); } @@ -11306,46 +11235,6 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || */ /* WOLFSSL_HAVE_SP_DH */ -#if defined(WOLFSSL_HAVE_SP_RSA) && !defined(SP_RSA_PRIVATE_EXP_D) && \ - !defined(RSA_LOW_MEM) && !defined(WOLFSSL_RSA_PUBLIC_ONLY) -/* AND m into each word of a and store in r. - * - * r A single precision integer. - * a A single precision integer. - * m Mask to AND against each digit. - */ -static void sp_4096_mask_39(sp_digit* r, const sp_digit* a, sp_digit m) -{ -#ifdef WOLFSSL_SP_SMALL - int i; - - for (i=0; i<39; i++) { - r[i] = a[i] & m; - } -#else - int i; - - for (i = 0; i < 32; i += 8) { - r[i+0] = a[i+0] & m; - r[i+1] = a[i+1] & m; - r[i+2] = a[i+2] & m; - r[i+3] = a[i+3] & m; - r[i+4] = a[i+4] & m; - r[i+5] = a[i+5] & m; - r[i+6] = a[i+6] & m; - r[i+7] = a[i+7] & m; - } - r[32] = a[32] & m; - r[33] = a[33] & m; - r[34] = a[34] & m; - r[35] = a[35] & m; - r[36] = a[36] & m; - r[37] = a[37] & m; - r[38] = a[38] & m; -#endif -} - -#endif #ifdef WOLFSSL_HAVE_SP_RSA /* RSA public key operation. * @@ -11577,6 +11466,8 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, } #ifndef WOLFSSL_RSA_PUBLIC_ONLY +#if !defined(SP_RSA_PRIVATE_EXP_D) && !defined(RSA_LOW_MEM) +#endif /* !SP_RSA_PRIVATE_EXP_D && !RSA_LOW_MEM */ /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -11706,7 +11597,6 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -11742,8 +11632,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 39; tmpb = tmpa + 78; - tmp = t; - r = tmp + 78; + r = t + 78; sp_4096_from_bin(a, 78, in, inLen); sp_4096_from_mp(p, 39, pm); @@ -11757,8 +11646,8 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, } if (err == MP_OKAY) { (void)sp_4096_sub_39(tmpa, tmpa, tmpb); - sp_4096_mask_39(tmp, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); - (void)sp_4096_add_39(tmpa, tmpa, tmp); + sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); + sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); sp_4096_from_mp(qi, 39, qim); sp_4096_mul_39(tmpa, tmpa, qi); @@ -11783,7 +11672,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, #else sp_digit a[78 * 2]; sp_digit p[39], q[39], dp[39], dq[39], qi[39]; - sp_digit tmp[78], tmpa[78], tmpb[78]; + sp_digit tmpa[78], tmpb[78]; sp_digit* r = a; int err = MP_OKAY; @@ -11818,8 +11707,8 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { (void)sp_4096_sub_39(tmpa, tmpa, tmpb); - sp_4096_mask_39(tmp, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); - (void)sp_4096_add_39(tmpa, tmpa, tmp); + sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); + sp_4096_cond_add_39(tmpa, tmpa, p, 0 - ((sp_int_digit)tmpa[38] >> 63)); sp_4096_mul_39(tmpa, tmpa, qi); err = sp_4096_mod_39(tmpa, tmpa, p); } diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index 39a27d630..bd4a01482 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -4112,6 +4112,44 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, mp_int* em, mp_int* mm, return err; } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #128\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r8]\n\t" + "and r6, r6, %[m]\n\t" + "adds r5, %[c], #-1\n\t" + "ldr r5, [%[a], r8]\n\t" + "adcs r5, r5, r6\n\t" + "mov %[c], #0\n\t" + "adcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r8", "r9" + ); + + return c; +} + /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -4146,7 +4184,6 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -4176,8 +4213,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 32; tmpb = tmpa + 64; - tmp = t; - r = tmp + 64; + r = t + 64; } #else r = a = ad; @@ -4186,7 +4222,6 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, qi = dq = dp = dpd; tmpa = tmpad; tmpb = tmpbd; - tmp = a + 64; #endif if (err == MP_OKAY) { @@ -4204,8 +4239,8 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { c = sp_2048_sub_in_place_32(tmpa, tmpb); - sp_2048_mask_32(tmp, p, c); - sp_2048_add_32(tmpa, tmpa, tmp); + c += sp_2048_cond_add_32(tmpa, tmpa, p, c); + sp_2048_cond_add_32(tmpa, tmpa, p, c); sp_2048_from_mp(qi, 32, qim); sp_2048_mul_32(tmpa, tmpa, qi); @@ -4236,6 +4271,7 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, return err; } +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) @@ -8666,6 +8702,44 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, return err; } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #192\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r8]\n\t" + "and r6, r6, %[m]\n\t" + "adds r5, %[c], #-1\n\t" + "ldr r5, [%[a], r8]\n\t" + "adcs r5, r5, r6\n\t" + "mov %[c], #0\n\t" + "adcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r8", "r9" + ); + + return c; +} + /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -8700,7 +8774,6 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -8730,8 +8803,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 48; tmpb = tmpa + 96; - tmp = t; - r = tmp + 96; + r = t + 96; } #else r = a = ad; @@ -8740,7 +8812,6 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, qi = dq = dp = dpd; tmpa = tmpad; tmpb = tmpbd; - tmp = a + 96; #endif if (err == MP_OKAY) { @@ -8758,8 +8829,8 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { c = sp_3072_sub_in_place_48(tmpa, tmpb); - sp_3072_mask_48(tmp, p, c); - sp_3072_add_48(tmpa, tmpa, tmp); + c += sp_3072_cond_add_48(tmpa, tmpa, p, c); + sp_3072_cond_add_48(tmpa, tmpa, p, c); sp_3072_from_mp(qi, 48, qim); sp_3072_mul_48(tmpa, tmpa, qi); @@ -8790,6 +8861,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, return err; } +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) @@ -12166,6 +12238,45 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, mp_int* em, mp_int* mm, return err; } +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +/* Conditionally add a and b using the mask m. + * m is -1 to add and 0 when not. + * + * r A single precision number representing conditional add result. + * a A single precision number to add with. + * b A single precision number to add. + * m Mask value to apply. + */ +SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b, + sp_digit m) +{ + sp_digit c = 0; + + __asm__ __volatile__ ( + "mov r5, #1\n\t" + "lsl r5, r5, #8\n\t" + "mov r9, r5\n\t" + "mov r8, #0\n\t" + "\n1:\n\t" + "ldr r6, [%[b], r8]\n\t" + "and r6, r6, %[m]\n\t" + "adds r5, %[c], #-1\n\t" + "ldr r5, [%[a], r8]\n\t" + "adcs r5, r5, r6\n\t" + "mov %[c], #0\n\t" + "adcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" + "add r8, r8, #4\n\t" + "cmp r8, r9\n\t" + "blt 1b\n\t" + : [c] "+r" (c) + : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) + : "memory", "r5", "r6", "r8", "r9" + ); + + return c; +} + /* RSA private key operation. * * in Array of bytes representing the number to exponentiate, base. @@ -12200,7 +12311,6 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, sp_digit* dp; sp_digit* dq; sp_digit* qi; - sp_digit* tmp; sp_digit* tmpa; sp_digit* tmpb; sp_digit* r; @@ -12230,8 +12340,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, tmpa = qi + 64; tmpb = tmpa + 128; - tmp = t; - r = tmp + 128; + r = t + 128; } #else r = a = ad; @@ -12240,7 +12349,6 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, qi = dq = dp = dpd; tmpa = tmpad; tmpb = tmpbd; - tmp = a + 128; #endif if (err == MP_OKAY) { @@ -12258,8 +12366,8 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, if (err == MP_OKAY) { c = sp_4096_sub_in_place_64(tmpa, tmpb); - sp_4096_mask_64(tmp, p, c); - sp_4096_add_64(tmpa, tmpa, tmp); + c += sp_4096_cond_add_64(tmpa, tmpa, p, c); + sp_4096_cond_add_64(tmpa, tmpa, p, c); sp_4096_from_mp(qi, 64, qim); sp_4096_mul_64(tmpa, tmpa, qi); @@ -12290,6 +12398,7 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, return err; } +#endif /* WOLFSSL_RSA_PUBLIC_ONLY */ #endif /* WOLFSSL_HAVE_SP_RSA */ #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) @@ -21053,26 +21162,24 @@ SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, c sp_digit c = 0; __asm__ __volatile__ ( - "mov r10, #0\n\t" - "mov r6, #48\n\t" - "sub r10, #1\n\t" - "mov r9, r6\n\t" + "mov r5, #48\n\t" + "mov r9, r5\n\t" "mov r8, #0\n\t" "\n1:\n\t" - "ldr r5, [%[a], r8]\n\t" "ldr r6, [%[b], r8]\n\t" "and r6, r6, %[m]\n\t" - "adds %[c], %[c], r10\n\t" + "adds r5, %[c], #-1\n\t" + "ldr r5, [%[a], r8]\n\t" "adcs r5, r5, r6\n\t" - "str r5, [%[r], r8]\n\t" "mov %[c], #0\n\t" - "adc %[c], %[c], %[c]\n\t" + "adcs %[c], %[c], %[c]\n\t" + "str r5, [%[r], r8]\n\t" "add r8, r8, #4\n\t" "cmp r8, r9\n\t" "blt 1b\n\t" : [c] "+r" (c) : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m) - : "memory", "r5", "r6", "r8", "r9", "r10" + : "memory", "r5", "r6", "r8", "r9" ); return c; diff --git a/wolfcrypt/src/sp_x86_64.c b/wolfcrypt/src/sp_x86_64.c index f9fb6d179..75dcfd78d 100644 --- a/wolfcrypt/src/sp_x86_64.c +++ b/wolfcrypt/src/sp_x86_64.c @@ -1573,13 +1573,13 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, mp_int* dm, #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { c += sp_2048_cond_add_avx2_16(tmpa, tmpa, p, c); - c += sp_2048_cond_add_avx2_16(tmpa, tmpa, p, c); + sp_2048_cond_add_avx2_16(tmpa, tmpa, p, c); } else #endif { c += sp_2048_cond_add_16(tmpa, tmpa, p, c); - c += sp_2048_cond_add_16(tmpa, tmpa, p, c); + sp_2048_cond_add_16(tmpa, tmpa, p, c); } sp_2048_from_mp(qi, 16, qim); @@ -3627,13 +3627,13 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { c += sp_3072_cond_add_avx2_24(tmpa, tmpa, p, c); - c += sp_3072_cond_add_avx2_24(tmpa, tmpa, p, c); + sp_3072_cond_add_avx2_24(tmpa, tmpa, p, c); } else #endif { c += sp_3072_cond_add_24(tmpa, tmpa, p, c); - c += sp_3072_cond_add_24(tmpa, tmpa, p, c); + sp_3072_cond_add_24(tmpa, tmpa, p, c); } sp_3072_from_mp(qi, 24, qim); @@ -5135,13 +5135,13 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, mp_int* dm, #ifdef HAVE_INTEL_AVX2 if (IS_INTEL_BMI2(cpuid_flags) && IS_INTEL_ADX(cpuid_flags)) { c += sp_4096_cond_add_avx2_32(tmpa, tmpa, p, c); - c += sp_4096_cond_add_avx2_32(tmpa, tmpa, p, c); + sp_4096_cond_add_avx2_32(tmpa, tmpa, p, c); } else #endif { c += sp_4096_cond_add_32(tmpa, tmpa, p, c); - c += sp_4096_cond_add_32(tmpa, tmpa, p, c); + sp_4096_cond_add_32(tmpa, tmpa, p, c); } sp_2048_from_mp(qi, 32, qim);