diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c index 46b080ca0..ebebe2a55 100644 --- a/wolfcrypt/src/sp_arm64.c +++ b/wolfcrypt/src/sp_arm64.c @@ -2503,7 +2503,6 @@ SP_NOINLINE static void sp_2048_mont_reduce_16(sp_digit* a, const sp_digit* m, "ldp x23, x24, [%[m], 64]\n\t" "ldp x25, x26, [%[m], 80]\n\t" "ldp x27, x28, [%[m], 96]\n\t" - "ldp x29, x30, [%[m], 112]\n\t" "# i = 16\n\t" "mov x4, 16\n\t" "ldp x12, x13, [%[a], 0]\n\t" @@ -2628,19 +2627,21 @@ SP_NOINLINE static void sp_2048_mont_reduce_16(sp_digit* a, const sp_digit* m, "# a[i+14] += m[14] * mu\n\t" "ldr x11, [%[a], 112]\n\t" "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 112]\n\t" "adds x10, x10, x6\n\t" - "mul x7, x29, x9\n\t" + "mul x7, x8, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x29, x9\n\t" + "umulh x8, x8, x9\n\t" "str x10, [%[a], 104]\n\t" "adds x11, x11, x7\n\t" "# a[i+15] += m[15] * mu\n\t" "ldr x10, [%[a], 120]\n\t" "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 120]\n\t" "adds x11, x11, x5\n\t" - "mul x7, x30, x9\n\t" + "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x30, x9\n\t" + "umulh x8, x8, x9\n\t" "adds x6, x6, x7\n\t" "adcs x8, x8, %[ca]\n\t" "str x11, [%[a], 112]\n\t" @@ -2657,7 +2658,7 @@ SP_NOINLINE static void sp_2048_mont_reduce_16(sp_digit* a, const sp_digit* m, "stp x12, x13, [%[a], 0]\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30" + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); sp_2048_cond_sub_16(a - 16, a, m, (sp_digit)0 - ca); @@ -3616,7 +3617,6 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, "ldp x23, x24, [%[m], 64]\n\t" "ldp x25, x26, [%[m], 80]\n\t" "ldp x27, x28, [%[m], 96]\n\t" - "ldp x29, x30, [%[m], 112]\n\t" "# i = 32\n\t" "mov x4, 32\n\t" "ldp x12, x13, [%[a], 0]\n\t" @@ -3741,19 +3741,21 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, "# a[i+14] += m[14] * mu\n\t" "ldr x11, [%[a], 112]\n\t" "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 112]\n\t" "adds x10, x10, x6\n\t" - "mul x7, x29, x9\n\t" + "mul x7, x8, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x29, x9\n\t" + "umulh x8, x8, x9\n\t" "str x10, [%[a], 104]\n\t" "adds x11, x11, x7\n\t" "# a[i+15] += m[15] * mu\n\t" "ldr x10, [%[a], 120]\n\t" "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 120]\n\t" "adds x11, x11, x5\n\t" - "mul x7, x30, x9\n\t" + "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x30, x9\n\t" + "umulh x8, x8, x9\n\t" "str x11, [%[a], 112]\n\t" "adds x10, x10, x7\n\t" "# a[i+16] += m[16] * mu\n\t" @@ -3930,7 +3932,7 @@ SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m, "stp x12, x13, [%[a], 0]\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30" + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca); @@ -9633,7 +9635,6 @@ SP_NOINLINE static void sp_3072_mont_reduce_24(sp_digit* a, const sp_digit* m, "ldp x23, x24, [%[m], 64]\n\t" "ldp x25, x26, [%[m], 80]\n\t" "ldp x27, x28, [%[m], 96]\n\t" - "ldp x29, x30, [%[m], 112]\n\t" "# i = 24\n\t" "mov x4, 24\n\t" "ldp x12, x13, [%[a], 0]\n\t" @@ -9758,19 +9759,21 @@ SP_NOINLINE static void sp_3072_mont_reduce_24(sp_digit* a, const sp_digit* m, "# a[i+14] += m[14] * mu\n\t" "ldr x11, [%[a], 112]\n\t" "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 112]\n\t" "adds x10, x10, x6\n\t" - "mul x7, x29, x9\n\t" + "mul x7, x8, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x29, x9\n\t" + "umulh x8, x8, x9\n\t" "str x10, [%[a], 104]\n\t" "adds x11, x11, x7\n\t" "# a[i+15] += m[15] * mu\n\t" "ldr x10, [%[a], 120]\n\t" "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 120]\n\t" "adds x11, x11, x5\n\t" - "mul x7, x30, x9\n\t" + "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x30, x9\n\t" + "umulh x8, x8, x9\n\t" "str x11, [%[a], 112]\n\t" "adds x10, x10, x7\n\t" "# a[i+16] += m[16] * mu\n\t" @@ -9867,7 +9870,7 @@ SP_NOINLINE static void sp_3072_mont_reduce_24(sp_digit* a, const sp_digit* m, "stp x12, x13, [%[a], 0]\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30" + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); sp_3072_cond_sub_24(a - 24, a, m, (sp_digit)0 - ca); @@ -11006,7 +11009,6 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, "ldp x23, x24, [%[m], 64]\n\t" "ldp x25, x26, [%[m], 80]\n\t" "ldp x27, x28, [%[m], 96]\n\t" - "ldp x29, x30, [%[m], 112]\n\t" "# i = 48\n\t" "mov x4, 48\n\t" "ldp x12, x13, [%[a], 0]\n\t" @@ -11131,19 +11133,21 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, "# a[i+14] += m[14] * mu\n\t" "ldr x11, [%[a], 112]\n\t" "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 112]\n\t" "adds x10, x10, x6\n\t" - "mul x7, x29, x9\n\t" + "mul x7, x8, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x29, x9\n\t" + "umulh x8, x8, x9\n\t" "str x10, [%[a], 104]\n\t" "adds x11, x11, x7\n\t" "# a[i+15] += m[15] * mu\n\t" "ldr x10, [%[a], 120]\n\t" "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 120]\n\t" "adds x11, x11, x5\n\t" - "mul x7, x30, x9\n\t" + "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x30, x9\n\t" + "umulh x8, x8, x9\n\t" "str x11, [%[a], 112]\n\t" "adds x10, x10, x7\n\t" "# a[i+16] += m[16] * mu\n\t" @@ -11480,7 +11484,7 @@ SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m, "stp x12, x13, [%[a], 0]\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30" + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca); @@ -15623,7 +15627,6 @@ SP_NOINLINE static void sp_4096_mont_reduce_64(sp_digit* a, const sp_digit* m, "ldp x23, x24, [%[m], 64]\n\t" "ldp x25, x26, [%[m], 80]\n\t" "ldp x27, x28, [%[m], 96]\n\t" - "ldp x29, x30, [%[m], 112]\n\t" "# i = 64\n\t" "mov x4, 64\n\t" "ldp x12, x13, [%[a], 0]\n\t" @@ -15748,19 +15751,21 @@ SP_NOINLINE static void sp_4096_mont_reduce_64(sp_digit* a, const sp_digit* m, "# a[i+14] += m[14] * mu\n\t" "ldr x11, [%[a], 112]\n\t" "adc x5, x8, xzr\n\t" + "ldr x8, [%[m], 112]\n\t" "adds x10, x10, x6\n\t" - "mul x7, x29, x9\n\t" + "mul x7, x8, x9\n\t" "adc x5, x5, xzr\n\t" - "umulh x8, x29, x9\n\t" + "umulh x8, x8, x9\n\t" "str x10, [%[a], 104]\n\t" "adds x11, x11, x7\n\t" "# a[i+15] += m[15] * mu\n\t" "ldr x10, [%[a], 120]\n\t" "adc x6, x8, xzr\n\t" + "ldr x8, [%[m], 120]\n\t" "adds x11, x11, x5\n\t" - "mul x7, x30, x9\n\t" + "mul x7, x8, x9\n\t" "adc x6, x6, xzr\n\t" - "umulh x8, x30, x9\n\t" + "umulh x8, x8, x9\n\t" "str x11, [%[a], 112]\n\t" "adds x10, x10, x7\n\t" "# a[i+16] += m[16] * mu\n\t" @@ -16257,7 +16262,7 @@ SP_NOINLINE static void sp_4096_mont_reduce_64(sp_digit* a, const sp_digit* m, "stp x12, x13, [%[a], 0]\n\t" : [ca] "+r" (ca), [a] "+r" (a) : [m] "r" (m), [mp] "r" (mp) - : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30" + : "memory", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" ); sp_4096_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca);