From 7cdf5c7956baf806004018f6af933074229e0cf0 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Mon, 30 Jan 2023 10:27:12 +1000 Subject: [PATCH] SP Aarch64 ECC P256: mont reduce fix For Montgomery Reduction of P256: Don't set x10 and x11 to words of mu << 32. x11 is needed later and there are plenty of registers. --- wolfcrypt/src/sp_arm64.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c index cf97ace04..2a38fcd71 100644 --- a/wolfcrypt/src/sp_arm64.c +++ b/wolfcrypt/src/sp_arm64.c @@ -22541,16 +22541,16 @@ SP_NOINLINE static void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, const "# - a[0] << 32 << 192\n\t" "# + (a[0] * 2) << 192\n\t" "# a[0]-a[2] << 32\n\t" - "extr x10, x10, x9, 32\n\t" + "extr x22, x10, x9, 32\n\t" "add x7, x11, x8\n\t" - "extr x9, x9, x8, 32\n\t" + "extr x21, x9, x8, 32\n\t" "add x7, x7, x8\n\t" "# + a[0]-a[2] << 32 << 64\n\t" "# - a[0] << 32 << 192\n\t" "adds x5, x5, x8, lsl #32\n\t" "sub x7, x7, x8, lsl #32\n\t" - "adcs x6, x6, x9\n\t" - "adc x7, x7, x10\n\t" + "adcs x6, x6, x21\n\t" + "adc x7, x7, x22\n\t" "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t" "# a += mu << 256\n\t" "adds x12, x12, x4\n\t" @@ -22689,16 +22689,16 @@ SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const "# - a[0] << 32 << 192\n\t" "# + (a[0] * 2) << 192\n\t" "# a[0]-a[2] << 32\n\t" - "extr x10, x10, x9, 32\n\t" + "extr x21, x10, x9, 32\n\t" "add x6, x11, x8\n\t" - "extr x9, x9, x8, 32\n\t" + "extr x20, x9, x8, 32\n\t" "add x6, x6, x8\n\t" "# + a[0]-a[2] << 32 << 64\n\t" "# - a[0] << 32 << 192\n\t" "adds x4, x4, x8, lsl #32\n\t" "sub x6, x6, x8, lsl #32\n\t" - "adcs x5, x5, x9\n\t" - "adc x6, x6, x10\n\t" + "adcs x5, x5, x20\n\t" + "adc x6, x6, x21\n\t" "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t" "# a += mu << 256\n\t" "adds x12, x12, x3\n\t" @@ -22752,7 +22752,7 @@ SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const "stp x14, x15, [%[r], 16]\n\t" : : [r] "r" (r), [a] "r" (a) - : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "cc" + : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "cc" ); } @@ -22994,16 +22994,16 @@ SP_NOINLINE static void sp_256_mont_reduce_4(sp_digit* a, const sp_digit* m, "# - a[0] << 32 << 192\n\t" "# + (a[0] * 2) << 192\n\t" "# a[0]-a[2] << 32\n\t" - "extr x12, x12, x11, 32\n\t" + "extr x20, x12, x11, 32\n\t" "add x6, x13, x10\n\t" - "extr x11, x11, x10, 32\n\t" + "extr x19, x11, x10, 32\n\t" "add x6, x6, x10\n\t" "# + a[0]-a[2] << 32 << 64\n\t" "# - a[0] << 32 << 192\n\t" "adds x4, x4, x10, lsl #32\n\t" "sub x6, x6, x10, lsl #32\n\t" - "adcs x5, x5, x11\n\t" - "adc x6, x6, x12\n\t" + "adcs x5, x5, x19\n\t" + "adc x6, x6, x20\n\t" "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t" "# a += mu << 256\n\t" "adds x14, x14, x3\n\t" @@ -23057,7 +23057,7 @@ SP_NOINLINE static void sp_256_mont_reduce_4(sp_digit* a, const sp_digit* m, "stp x16, x17, [%[a], 16]\n\t" : : [a] "r" (a), [m] "r" (m), [mp] "r" (mp) - : "memory", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" + : "memory", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x19", "x20", "cc" ); } /* Reduce the number back to 256 bits using Montgomery reduction.