forked from wolfSSL/wolfssl
Merge pull request #3621 from SparkiDev/sp_mac_arm64
SP arm64 MAC: stop non-ct mod inv from using x29
This commit is contained in:
@@ -37361,7 +37361,7 @@ static int sp_256_mod_inv_4(sp_digit* r, const sp_digit* a,
|
|||||||
"ldr x20, [%[m], 0]\n\t"
|
"ldr x20, [%[m], 0]\n\t"
|
||||||
"ldr x21, [%[m], 8]\n\t"
|
"ldr x21, [%[m], 8]\n\t"
|
||||||
"ldr x22, [%[m], 16]\n\t"
|
"ldr x22, [%[m], 16]\n\t"
|
||||||
"ldr x23, [%[m], 24]\n\t"
|
"ldr %[m], [%[m], 24]\n\t"
|
||||||
"ldr x7, [%[a], 0]\n\t"
|
"ldr x7, [%[a], 0]\n\t"
|
||||||
"ldr x8, [%[a], 8]\n\t"
|
"ldr x8, [%[a], 8]\n\t"
|
||||||
"ldr x9, [%[a], 16]\n\t"
|
"ldr x9, [%[a], 16]\n\t"
|
||||||
@@ -37369,7 +37369,7 @@ static int sp_256_mod_inv_4(sp_digit* r, const sp_digit* a,
|
|||||||
"mov x3, x20\n\t"
|
"mov x3, x20\n\t"
|
||||||
"mov x4, x21\n\t"
|
"mov x4, x21\n\t"
|
||||||
"mov x5, x22\n\t"
|
"mov x5, x22\n\t"
|
||||||
"mov x6, x23\n\t"
|
"mov x6, %[m]\n\t"
|
||||||
"mov x11, xzr\n\t"
|
"mov x11, xzr\n\t"
|
||||||
"mov x12, xzr\n\t"
|
"mov x12, xzr\n\t"
|
||||||
"mov x13, xzr\n\t"
|
"mov x13, xzr\n\t"
|
||||||
@@ -37380,89 +37380,89 @@ static int sp_256_mod_inv_4(sp_digit* r, const sp_digit* a,
|
|||||||
"mov x19, xzr\n\t"
|
"mov x19, xzr\n\t"
|
||||||
"cmp x6, 0\n\t"
|
"cmp x6, 0\n\t"
|
||||||
"b.eq 10f\n\t"
|
"b.eq 10f\n\t"
|
||||||
"mov x26, 256\n\t"
|
"mov x25, 256\n\t"
|
||||||
"clz x24, x6\n\t"
|
"clz x23, x6\n\t"
|
||||||
"sub x24, x26, x24\n\t"
|
"sub x23, x25, x23\n\t"
|
||||||
"b 13f\n\t"
|
"b 13f\n\t"
|
||||||
"\n10:\n\t"
|
"\n10:\n\t"
|
||||||
"cmp x5, 0\n\t"
|
"cmp x5, 0\n\t"
|
||||||
"b.eq 11f\n\t"
|
"b.eq 11f\n\t"
|
||||||
"mov x26, 192\n\t"
|
"mov x25, 192\n\t"
|
||||||
"clz x24, x5\n\t"
|
"clz x23, x5\n\t"
|
||||||
"sub x24, x26, x24\n\t"
|
"sub x23, x25, x23\n\t"
|
||||||
"b 13f\n\t"
|
"b 13f\n\t"
|
||||||
"\n11:\n\t"
|
"\n11:\n\t"
|
||||||
"cmp x4, 0\n\t"
|
"cmp x4, 0\n\t"
|
||||||
"b.eq 12f\n\t"
|
"b.eq 12f\n\t"
|
||||||
"mov x26, 128\n\t"
|
"mov x25, 128\n\t"
|
||||||
"clz x24, x4\n\t"
|
"clz x23, x4\n\t"
|
||||||
"sub x24, x26, x24\n\t"
|
"sub x23, x25, x23\n\t"
|
||||||
"b 13f\n\t"
|
"b 13f\n\t"
|
||||||
"\n12:\n\t"
|
"\n12:\n\t"
|
||||||
"mov x26, 64\n\t"
|
"mov x25, 64\n\t"
|
||||||
"clz x24, x3\n\t"
|
"clz x23, x3\n\t"
|
||||||
"sub x24, x26, x24\n\t"
|
"sub x23, x25, x23\n\t"
|
||||||
"\n13:\n\t"
|
"\n13:\n\t"
|
||||||
"cmp x10, 0\n\t"
|
"cmp x10, 0\n\t"
|
||||||
"b.eq 20f\n\t"
|
"b.eq 20f\n\t"
|
||||||
"mov x26, 256\n\t"
|
"mov x25, 256\n\t"
|
||||||
"clz x25, x10\n\t"
|
"clz x24, x10\n\t"
|
||||||
"sub x25, x26, x25\n\t"
|
"sub x24, x25, x24\n\t"
|
||||||
"b 23f\n\t"
|
"b 23f\n\t"
|
||||||
"\n20:\n\t"
|
"\n20:\n\t"
|
||||||
"cmp x9, 0\n\t"
|
"cmp x9, 0\n\t"
|
||||||
"b.eq 21f\n\t"
|
"b.eq 21f\n\t"
|
||||||
"mov x26, 192\n\t"
|
"mov x25, 192\n\t"
|
||||||
"clz x25, x9\n\t"
|
"clz x24, x9\n\t"
|
||||||
"sub x25, x26, x25\n\t"
|
"sub x24, x25, x24\n\t"
|
||||||
"b 23f\n\t"
|
"b 23f\n\t"
|
||||||
"\n21:\n\t"
|
"\n21:\n\t"
|
||||||
"cmp x8, 0\n\t"
|
"cmp x8, 0\n\t"
|
||||||
"b.eq 22f\n\t"
|
"b.eq 22f\n\t"
|
||||||
"mov x26, 128\n\t"
|
"mov x25, 128\n\t"
|
||||||
"clz x25, x8\n\t"
|
"clz x24, x8\n\t"
|
||||||
"sub x25, x26, x25\n\t"
|
"sub x24, x25, x24\n\t"
|
||||||
"b 23f\n\t"
|
"b 23f\n\t"
|
||||||
"\n22:\n\t"
|
"\n22:\n\t"
|
||||||
"mov x26, 64\n\t"
|
"mov x25, 64\n\t"
|
||||||
"clz x25, x7\n\t"
|
"clz x24, x7\n\t"
|
||||||
"sub x25, x26, x25\n\t"
|
"sub x24, x25, x24\n\t"
|
||||||
"\n23:\n\t"
|
"\n23:\n\t"
|
||||||
"tst x7, 1\n\t"
|
"tst x7, 1\n\t"
|
||||||
"b.ne 90f\n\t"
|
"b.ne 90f\n\t"
|
||||||
"\n1:\n\t"
|
"\n1:\n\t"
|
||||||
"lsr x7, x7, 1\n\t"
|
"lsr x7, x7, 1\n\t"
|
||||||
"lsr x27, x8, 1\n\t"
|
"lsr x26, x8, 1\n\t"
|
||||||
"lsr x28, x9, 1\n\t"
|
"lsr x27, x9, 1\n\t"
|
||||||
"orr x7, x7, x8, lsl 63\n\t"
|
"orr x7, x7, x8, lsl 63\n\t"
|
||||||
"orr x8, x27, x9, lsl 63\n\t"
|
"orr x8, x26, x9, lsl 63\n\t"
|
||||||
"orr x9, x28, x10, lsl 63\n\t"
|
"orr x9, x27, x10, lsl 63\n\t"
|
||||||
"lsr x10, x10, 1\n\t"
|
"lsr x10, x10, 1\n\t"
|
||||||
"sub x25, x25, 1\n\t"
|
"sub x24, x24, 1\n\t"
|
||||||
"ands x26, x15, 1\n\t"
|
"ands x25, x15, 1\n\t"
|
||||||
"b.eq 2f\n\t"
|
"b.eq 2f\n\t"
|
||||||
"adds x15, x15, x20\n\t"
|
"adds x15, x15, x20\n\t"
|
||||||
"adcs x16, x16, x21\n\t"
|
"adcs x16, x16, x21\n\t"
|
||||||
"adcs x17, x17, x22\n\t"
|
"adcs x17, x17, x22\n\t"
|
||||||
"adcs x19, x19, x23\n\t"
|
"adcs x19, x19, %[m]\n\t"
|
||||||
"cset x26, cs\n\t"
|
"cset x25, cs\n\t"
|
||||||
"\n2:\n\t"
|
"\n2:\n\t"
|
||||||
"lsr x15, x15, 1\n\t"
|
"lsr x15, x15, 1\n\t"
|
||||||
"lsr x27, x16, 1\n\t"
|
"lsr x26, x16, 1\n\t"
|
||||||
"lsr x28, x17, 1\n\t"
|
"lsr x27, x17, 1\n\t"
|
||||||
"lsr x29, x19, 1\n\t"
|
"lsr x28, x19, 1\n\t"
|
||||||
"orr x15, x15, x16, lsl 63\n\t"
|
"orr x15, x15, x16, lsl 63\n\t"
|
||||||
"orr x16, x27, x17, lsl 63\n\t"
|
"orr x16, x26, x17, lsl 63\n\t"
|
||||||
"orr x17, x28, x19, lsl 63\n\t"
|
"orr x17, x27, x19, lsl 63\n\t"
|
||||||
"orr x19, x29, x26, lsl 63\n\t"
|
"orr x19, x28, x25, lsl 63\n\t"
|
||||||
"tst x7, 1\n\t"
|
"tst x7, 1\n\t"
|
||||||
"b.eq 1b\n\t"
|
"b.eq 1b\n\t"
|
||||||
"\n90:\n\t"
|
"\n90:\n\t"
|
||||||
"cmp x24, 1\n\t"
|
"cmp x23, 1\n\t"
|
||||||
"b.eq 100f\n\t"
|
"b.eq 100f\n\t"
|
||||||
"cmp x25, 1\n\t"
|
"cmp x24, 1\n\t"
|
||||||
"b.eq 101f\n\t"
|
"b.eq 101f\n\t"
|
||||||
"cmp x24, x25\n\t"
|
"cmp x23, x24\n\t"
|
||||||
"b.hi 91f\n\t"
|
"b.hi 91f\n\t"
|
||||||
"b.cc 92f\n\t"
|
"b.cc 92f\n\t"
|
||||||
"cmp x6, x10\n\t"
|
"cmp x6, x10\n\t"
|
||||||
@@ -37489,58 +37489,58 @@ static int sp_256_mod_inv_4(sp_digit* r, const sp_digit* a,
|
|||||||
"adds x11, x11, x20\n\t"
|
"adds x11, x11, x20\n\t"
|
||||||
"adcs x12, x12, x21\n\t"
|
"adcs x12, x12, x21\n\t"
|
||||||
"adcs x13, x13, x22\n\t"
|
"adcs x13, x13, x22\n\t"
|
||||||
"adc x14, x14, x23\n\t"
|
"adc x14, x14, %[m]\n\t"
|
||||||
"\n30:\n\t"
|
"\n30:\n\t"
|
||||||
"cmp x6, 0\n\t"
|
"cmp x6, 0\n\t"
|
||||||
"b.eq 40f\n\t"
|
"b.eq 40f\n\t"
|
||||||
"mov x26, 256\n\t"
|
"mov x25, 256\n\t"
|
||||||
"clz x24, x6\n\t"
|
"clz x23, x6\n\t"
|
||||||
"sub x24, x26, x24\n\t"
|
"sub x23, x25, x23\n\t"
|
||||||
"b 43f\n\t"
|
"b 43f\n\t"
|
||||||
"\n40:\n\t"
|
"\n40:\n\t"
|
||||||
"cmp x5, 0\n\t"
|
"cmp x5, 0\n\t"
|
||||||
"b.eq 41f\n\t"
|
"b.eq 41f\n\t"
|
||||||
"mov x26, 192\n\t"
|
"mov x25, 192\n\t"
|
||||||
"clz x24, x5\n\t"
|
"clz x23, x5\n\t"
|
||||||
"sub x24, x26, x24\n\t"
|
"sub x23, x25, x23\n\t"
|
||||||
"b 43f\n\t"
|
"b 43f\n\t"
|
||||||
"\n41:\n\t"
|
"\n41:\n\t"
|
||||||
"cmp x4, 0\n\t"
|
"cmp x4, 0\n\t"
|
||||||
"b.eq 42f\n\t"
|
"b.eq 42f\n\t"
|
||||||
"mov x26, 128\n\t"
|
"mov x25, 128\n\t"
|
||||||
"clz x24, x4\n\t"
|
"clz x23, x4\n\t"
|
||||||
"sub x24, x26, x24\n\t"
|
"sub x23, x25, x23\n\t"
|
||||||
"b 43f\n\t"
|
"b 43f\n\t"
|
||||||
"\n42:\n\t"
|
"\n42:\n\t"
|
||||||
"mov x26, 64\n\t"
|
"mov x25, 64\n\t"
|
||||||
"clz x24, x3\n\t"
|
"clz x23, x3\n\t"
|
||||||
"sub x24, x26, x24\n\t"
|
"sub x23, x25, x23\n\t"
|
||||||
"\n43:\n\t"
|
"\n43:\n\t"
|
||||||
"\n50:\n\t"
|
"\n50:\n\t"
|
||||||
"lsr x3, x3, 1\n\t"
|
"lsr x3, x3, 1\n\t"
|
||||||
"lsr x27, x4, 1\n\t"
|
"lsr x26, x4, 1\n\t"
|
||||||
"lsr x28, x5, 1\n\t"
|
"lsr x27, x5, 1\n\t"
|
||||||
"orr x3, x3, x4, lsl 63\n\t"
|
"orr x3, x3, x4, lsl 63\n\t"
|
||||||
"orr x4, x27, x5, lsl 63\n\t"
|
"orr x4, x26, x5, lsl 63\n\t"
|
||||||
"orr x5, x28, x6, lsl 63\n\t"
|
"orr x5, x27, x6, lsl 63\n\t"
|
||||||
"lsr x6, x6, 1\n\t"
|
"lsr x6, x6, 1\n\t"
|
||||||
"sub x24, x24, 1\n\t"
|
"sub x23, x23, 1\n\t"
|
||||||
"ands x26, x11, 1\n\t"
|
"ands x25, x11, 1\n\t"
|
||||||
"b.eq 51f\n\t"
|
"b.eq 51f\n\t"
|
||||||
"adds x11, x11, x20\n\t"
|
"adds x11, x11, x20\n\t"
|
||||||
"adcs x12, x12, x21\n\t"
|
"adcs x12, x12, x21\n\t"
|
||||||
"adcs x13, x13, x22\n\t"
|
"adcs x13, x13, x22\n\t"
|
||||||
"adcs x14, x14, x23\n\t"
|
"adcs x14, x14, %[m]\n\t"
|
||||||
"cset x26, cs\n\t"
|
"cset x25, cs\n\t"
|
||||||
"\n51:\n\t"
|
"\n51:\n\t"
|
||||||
"lsr x11, x11, 1\n\t"
|
"lsr x11, x11, 1\n\t"
|
||||||
"lsr x27, x12, 1\n\t"
|
"lsr x26, x12, 1\n\t"
|
||||||
"lsr x28, x13, 1\n\t"
|
"lsr x27, x13, 1\n\t"
|
||||||
"lsr x29, x14, 1\n\t"
|
"lsr x28, x14, 1\n\t"
|
||||||
"orr x11, x11, x12, lsl 63\n\t"
|
"orr x11, x11, x12, lsl 63\n\t"
|
||||||
"orr x12, x27, x13, lsl 63\n\t"
|
"orr x12, x26, x13, lsl 63\n\t"
|
||||||
"orr x13, x28, x14, lsl 63\n\t"
|
"orr x13, x27, x14, lsl 63\n\t"
|
||||||
"orr x14, x29, x26, lsl 63\n\t"
|
"orr x14, x28, x25, lsl 63\n\t"
|
||||||
"tst x3, 1\n\t"
|
"tst x3, 1\n\t"
|
||||||
"b.eq 50b\n\t"
|
"b.eq 50b\n\t"
|
||||||
"b 90b\n\t"
|
"b 90b\n\t"
|
||||||
@@ -37557,58 +37557,58 @@ static int sp_256_mod_inv_4(sp_digit* r, const sp_digit* a,
|
|||||||
"adds x15, x15, x20\n\t"
|
"adds x15, x15, x20\n\t"
|
||||||
"adcs x16, x16, x21\n\t"
|
"adcs x16, x16, x21\n\t"
|
||||||
"adcs x17, x17, x22\n\t"
|
"adcs x17, x17, x22\n\t"
|
||||||
"adc x19, x19, x23\n\t"
|
"adc x19, x19, %[m]\n\t"
|
||||||
"\n60:\n\t"
|
"\n60:\n\t"
|
||||||
"cmp x10, 0\n\t"
|
"cmp x10, 0\n\t"
|
||||||
"b.eq 70f\n\t"
|
"b.eq 70f\n\t"
|
||||||
"mov x26, 256\n\t"
|
"mov x25, 256\n\t"
|
||||||
"clz x25, x10\n\t"
|
"clz x24, x10\n\t"
|
||||||
"sub x25, x26, x25\n\t"
|
"sub x24, x25, x24\n\t"
|
||||||
"b 73f\n\t"
|
"b 73f\n\t"
|
||||||
"\n70:\n\t"
|
"\n70:\n\t"
|
||||||
"cmp x9, 0\n\t"
|
"cmp x9, 0\n\t"
|
||||||
"b.eq 71f\n\t"
|
"b.eq 71f\n\t"
|
||||||
"mov x26, 192\n\t"
|
"mov x25, 192\n\t"
|
||||||
"clz x25, x9\n\t"
|
"clz x24, x9\n\t"
|
||||||
"sub x25, x26, x25\n\t"
|
"sub x24, x25, x24\n\t"
|
||||||
"b 73f\n\t"
|
"b 73f\n\t"
|
||||||
"\n71:\n\t"
|
"\n71:\n\t"
|
||||||
"cmp x8, 0\n\t"
|
"cmp x8, 0\n\t"
|
||||||
"b.eq 72f\n\t"
|
"b.eq 72f\n\t"
|
||||||
"mov x26, 128\n\t"
|
"mov x25, 128\n\t"
|
||||||
"clz x25, x8\n\t"
|
"clz x24, x8\n\t"
|
||||||
"sub x25, x26, x25\n\t"
|
"sub x24, x25, x24\n\t"
|
||||||
"b 73f\n\t"
|
"b 73f\n\t"
|
||||||
"\n72:\n\t"
|
"\n72:\n\t"
|
||||||
"mov x26, 64\n\t"
|
"mov x25, 64\n\t"
|
||||||
"clz x25, x7\n\t"
|
"clz x24, x7\n\t"
|
||||||
"sub x25, x26, x25\n\t"
|
"sub x24, x25, x24\n\t"
|
||||||
"\n73:\n\t"
|
"\n73:\n\t"
|
||||||
"\n80:\n\t"
|
"\n80:\n\t"
|
||||||
"lsr x7, x7, 1\n\t"
|
"lsr x7, x7, 1\n\t"
|
||||||
"lsr x27, x8, 1\n\t"
|
"lsr x26, x8, 1\n\t"
|
||||||
"lsr x28, x9, 1\n\t"
|
"lsr x27, x9, 1\n\t"
|
||||||
"orr x7, x7, x8, lsl 63\n\t"
|
"orr x7, x7, x8, lsl 63\n\t"
|
||||||
"orr x8, x27, x9, lsl 63\n\t"
|
"orr x8, x26, x9, lsl 63\n\t"
|
||||||
"orr x9, x28, x10, lsl 63\n\t"
|
"orr x9, x27, x10, lsl 63\n\t"
|
||||||
"lsr x10, x10, 1\n\t"
|
"lsr x10, x10, 1\n\t"
|
||||||
"sub x25, x25, 1\n\t"
|
"sub x24, x24, 1\n\t"
|
||||||
"ands x26, x15, 1\n\t"
|
"ands x25, x15, 1\n\t"
|
||||||
"b.eq 81f\n\t"
|
"b.eq 81f\n\t"
|
||||||
"adds x15, x15, x20\n\t"
|
"adds x15, x15, x20\n\t"
|
||||||
"adcs x16, x16, x21\n\t"
|
"adcs x16, x16, x21\n\t"
|
||||||
"adcs x17, x17, x22\n\t"
|
"adcs x17, x17, x22\n\t"
|
||||||
"adcs x19, x19, x23\n\t"
|
"adcs x19, x19, %[m]\n\t"
|
||||||
"cset x26, cs\n\t"
|
"cset x25, cs\n\t"
|
||||||
"\n81:\n\t"
|
"\n81:\n\t"
|
||||||
"lsr x15, x15, 1\n\t"
|
"lsr x15, x15, 1\n\t"
|
||||||
"lsr x27, x16, 1\n\t"
|
"lsr x26, x16, 1\n\t"
|
||||||
"lsr x28, x17, 1\n\t"
|
"lsr x27, x17, 1\n\t"
|
||||||
"lsr x29, x19, 1\n\t"
|
"lsr x28, x19, 1\n\t"
|
||||||
"orr x15, x15, x16, lsl 63\n\t"
|
"orr x15, x15, x16, lsl 63\n\t"
|
||||||
"orr x16, x27, x17, lsl 63\n\t"
|
"orr x16, x26, x17, lsl 63\n\t"
|
||||||
"orr x17, x28, x19, lsl 63\n\t"
|
"orr x17, x27, x19, lsl 63\n\t"
|
||||||
"orr x19, x29, x26, lsl 63\n\t"
|
"orr x19, x28, x25, lsl 63\n\t"
|
||||||
"tst x7, 1\n\t"
|
"tst x7, 1\n\t"
|
||||||
"b.eq 80b\n\t"
|
"b.eq 80b\n\t"
|
||||||
"b 90b\n\t"
|
"b 90b\n\t"
|
||||||
@@ -37624,9 +37624,9 @@ static int sp_256_mod_inv_4(sp_digit* r, const sp_digit* a,
|
|||||||
"str x17, [%[r], 16]\n\t"
|
"str x17, [%[r], 16]\n\t"
|
||||||
"str x19, [%[r], 24]\n\t"
|
"str x19, [%[r], 24]\n\t"
|
||||||
"\n102:\n\t"
|
"\n102:\n\t"
|
||||||
:
|
: [m] "+r" (m)
|
||||||
: [r] "r" (r), [a] "r" (a), [m] "r" (m)
|
: [r] "r" (r), [a] "r" (a)
|
||||||
: "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29"
|
: "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28"
|
||||||
);
|
);
|
||||||
|
|
||||||
return MP_OKAY;
|
return MP_OKAY;
|
||||||
|
Reference in New Issue
Block a user