mirror of
https://github.com/wolfSSL/wolfssl.git
synced 2025-07-30 02:37:28 +02:00
SP Aarch64 ECC P256: mont reduce fix
For Montgomery Reduction of P256: Don't set x10 and x11 to words of mu << 32. x11 is needed later and there are plenty of registers.
This commit is contained in:
committed by
David Garske
parent
420f2f45c1
commit
7cdf5c7956
@ -22541,16 +22541,16 @@ SP_NOINLINE static void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, const
|
||||
"# - a[0] << 32 << 192\n\t"
|
||||
"# + (a[0] * 2) << 192\n\t"
|
||||
"# a[0]-a[2] << 32\n\t"
|
||||
"extr x10, x10, x9, 32\n\t"
|
||||
"extr x22, x10, x9, 32\n\t"
|
||||
"add x7, x11, x8\n\t"
|
||||
"extr x9, x9, x8, 32\n\t"
|
||||
"extr x21, x9, x8, 32\n\t"
|
||||
"add x7, x7, x8\n\t"
|
||||
"# + a[0]-a[2] << 32 << 64\n\t"
|
||||
"# - a[0] << 32 << 192\n\t"
|
||||
"adds x5, x5, x8, lsl #32\n\t"
|
||||
"sub x7, x7, x8, lsl #32\n\t"
|
||||
"adcs x6, x6, x9\n\t"
|
||||
"adc x7, x7, x10\n\t"
|
||||
"adcs x6, x6, x21\n\t"
|
||||
"adc x7, x7, x22\n\t"
|
||||
"# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
|
||||
"# a += mu << 256\n\t"
|
||||
"adds x12, x12, x4\n\t"
|
||||
@ -22689,16 +22689,16 @@ SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const
|
||||
"# - a[0] << 32 << 192\n\t"
|
||||
"# + (a[0] * 2) << 192\n\t"
|
||||
"# a[0]-a[2] << 32\n\t"
|
||||
"extr x10, x10, x9, 32\n\t"
|
||||
"extr x21, x10, x9, 32\n\t"
|
||||
"add x6, x11, x8\n\t"
|
||||
"extr x9, x9, x8, 32\n\t"
|
||||
"extr x20, x9, x8, 32\n\t"
|
||||
"add x6, x6, x8\n\t"
|
||||
"# + a[0]-a[2] << 32 << 64\n\t"
|
||||
"# - a[0] << 32 << 192\n\t"
|
||||
"adds x4, x4, x8, lsl #32\n\t"
|
||||
"sub x6, x6, x8, lsl #32\n\t"
|
||||
"adcs x5, x5, x9\n\t"
|
||||
"adc x6, x6, x10\n\t"
|
||||
"adcs x5, x5, x20\n\t"
|
||||
"adc x6, x6, x21\n\t"
|
||||
"# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
|
||||
"# a += mu << 256\n\t"
|
||||
"adds x12, x12, x3\n\t"
|
||||
@ -22752,7 +22752,7 @@ SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const
|
||||
"stp x14, x15, [%[r], 16]\n\t"
|
||||
:
|
||||
: [r] "r" (r), [a] "r" (a)
|
||||
: "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "cc"
|
||||
: "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
@ -22994,16 +22994,16 @@ SP_NOINLINE static void sp_256_mont_reduce_4(sp_digit* a, const sp_digit* m,
|
||||
"# - a[0] << 32 << 192\n\t"
|
||||
"# + (a[0] * 2) << 192\n\t"
|
||||
"# a[0]-a[2] << 32\n\t"
|
||||
"extr x12, x12, x11, 32\n\t"
|
||||
"extr x20, x12, x11, 32\n\t"
|
||||
"add x6, x13, x10\n\t"
|
||||
"extr x11, x11, x10, 32\n\t"
|
||||
"extr x19, x11, x10, 32\n\t"
|
||||
"add x6, x6, x10\n\t"
|
||||
"# + a[0]-a[2] << 32 << 64\n\t"
|
||||
"# - a[0] << 32 << 192\n\t"
|
||||
"adds x4, x4, x10, lsl #32\n\t"
|
||||
"sub x6, x6, x10, lsl #32\n\t"
|
||||
"adcs x5, x5, x11\n\t"
|
||||
"adc x6, x6, x12\n\t"
|
||||
"adcs x5, x5, x19\n\t"
|
||||
"adc x6, x6, x20\n\t"
|
||||
"# a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu\n\t"
|
||||
"# a += mu << 256\n\t"
|
||||
"adds x14, x14, x3\n\t"
|
||||
@ -23057,7 +23057,7 @@ SP_NOINLINE static void sp_256_mont_reduce_4(sp_digit* a, const sp_digit* m,
|
||||
"stp x16, x17, [%[a], 16]\n\t"
|
||||
:
|
||||
: [a] "r" (a), [m] "r" (m), [mp] "r" (mp)
|
||||
: "memory", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "cc"
|
||||
: "memory", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x19", "x20", "cc"
|
||||
);
|
||||
}
|
||||
/* Reduce the number back to 256 bits using Montgomery reduction.
|
||||
|
Reference in New Issue
Block a user