SP Aarch64 ECC P256: mont reduce fix

For Montgomery Reduction of P256: Don't set x10 and x11 to words of mu
<< 32. x11 is needed later and there are plenty of registers.
This commit is contained in:
Sean Parkinson
2023-01-30 10:27:12 +10:00
committed by David Garske
parent 420f2f45c1
commit 7cdf5c7956

View File

@@ -22541,16 +22541,16 @@ SP_NOINLINE static void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, const
"# - a[0] << 32 << 192\n\t" "# - a[0] << 32 << 192\n\t"
"# + (a[0] * 2) << 192\n\t" "# + (a[0] * 2) << 192\n\t"
"# a[0]-a[2] << 32\n\t" "# a[0]-a[2] << 32\n\t"
"extr x10, x10, x9, 32\n\t" "extr x22, x10, x9, 32\n\t"
"add x7, x11, x8\n\t" "add x7, x11, x8\n\t"
"extr x9, x9, x8, 32\n\t" "extr x21, x9, x8, 32\n\t"
"add x7, x7, x8\n\t" "add x7, x7, x8\n\t"
"# + a[0]-a[2] << 32 << 64\n\t" "# + a[0]-a[2] << 32 << 64\n\t"
"# - a[0] << 32 << 192\n\t" "# - a[0] << 32 << 192\n\t"
"adds x5, x5, x8, lsl #32\n\t" "adds x5, x5, x8, lsl #32\n\t"
"sub x7, x7, x8, lsl #32\n\t" "sub x7, x7, x8, lsl #32\n\t"
"adcs x6, x6, x9\n\t" "adcs x6, x6, x21\n\t"
"adc x7, x7, x10\n\t" "adc x7, x7, x22\n\t"
"# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t" "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
"# a += mu << 256\n\t" "# a += mu << 256\n\t"
"adds x12, x12, x4\n\t" "adds x12, x12, x4\n\t"
@@ -22689,16 +22689,16 @@ SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const
"# - a[0] << 32 << 192\n\t" "# - a[0] << 32 << 192\n\t"
"# + (a[0] * 2) << 192\n\t" "# + (a[0] * 2) << 192\n\t"
"# a[0]-a[2] << 32\n\t" "# a[0]-a[2] << 32\n\t"
"extr x10, x10, x9, 32\n\t" "extr x21, x10, x9, 32\n\t"
"add x6, x11, x8\n\t" "add x6, x11, x8\n\t"
"extr x9, x9, x8, 32\n\t" "extr x20, x9, x8, 32\n\t"
"add x6, x6, x8\n\t" "add x6, x6, x8\n\t"
"# + a[0]-a[2] << 32 << 64\n\t" "# + a[0]-a[2] << 32 << 64\n\t"
"# - a[0] << 32 << 192\n\t" "# - a[0] << 32 << 192\n\t"
"adds x4, x4, x8, lsl #32\n\t" "adds x4, x4, x8, lsl #32\n\t"
"sub x6, x6, x8, lsl #32\n\t" "sub x6, x6, x8, lsl #32\n\t"
"adcs x5, x5, x9\n\t" "adcs x5, x5, x20\n\t"
"adc x6, x6, x10\n\t" "adc x6, x6, x21\n\t"
"# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t" "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
"# a += mu << 256\n\t" "# a += mu << 256\n\t"
"adds x12, x12, x3\n\t" "adds x12, x12, x3\n\t"
@@ -22752,7 +22752,7 @@ SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const
"stp x14, x15, [%[r], 16]\n\t" "stp x14, x15, [%[r], 16]\n\t"
: :
: [r] "r" (r), [a] "r" (a) : [r] "r" (r), [a] "r" (a)
: "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "cc" : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "cc"
); );
} }
@@ -22994,16 +22994,16 @@ SP_NOINLINE static void sp_256_mont_reduce_4(sp_digit* a, const sp_digit* m,
"# - a[0] << 32 << 192\n\t" "# - a[0] << 32 << 192\n\t"
"# + (a[0] * 2) << 192\n\t" "# + (a[0] * 2) << 192\n\t"
"# a[0]-a[2] << 32\n\t" "# a[0]-a[2] << 32\n\t"
"extr x12, x12, x11, 32\n\t" "extr x20, x12, x11, 32\n\t"
"add x6, x13, x10\n\t" "add x6, x13, x10\n\t"
"extr x11, x11, x10, 32\n\t" "extr x19, x11, x10, 32\n\t"
"add x6, x6, x10\n\t" "add x6, x6, x10\n\t"
"# + a[0]-a[2] << 32 << 64\n\t" "# + a[0]-a[2] << 32 << 64\n\t"
"# - a[0] << 32 << 192\n\t" "# - a[0] << 32 << 192\n\t"
"adds x4, x4, x10, lsl #32\n\t" "adds x4, x4, x10, lsl #32\n\t"
"sub x6, x6, x10, lsl #32\n\t" "sub x6, x6, x10, lsl #32\n\t"
"adcs x5, x5, x11\n\t" "adcs x5, x5, x19\n\t"
"adc x6, x6, x12\n\t" "adc x6, x6, x20\n\t"
"# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t" "# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
"# a += mu << 256\n\t" "# a += mu << 256\n\t"
"adds x14, x14, x3\n\t" "adds x14, x14, x3\n\t"
@@ -23057,7 +23057,7 @@ SP_NOINLINE static void sp_256_mont_reduce_4(sp_digit* a, const sp_digit* m,
"stp x16, x17, [%[a], 16]\n\t" "stp x16, x17, [%[a], 16]\n\t"
: :
: [a] "r" (a), [m] "r" (m), [mp] "r" (mp) : [a] "r" (a), [m] "r" (m), [mp] "r" (mp)
: "memory", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc" : "memory", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x19", "x20", "cc"
); );
} }
/* Reduce the number back to 256 bits using Montgomery reduction. /* Reduce the number back to 256 bits using Montgomery reduction.