SP EC ASM: mod_mul_norm fix

Handle corner case of overflow in last 32-bit word.
This commit is contained in:
Sean Parkinson
2023-01-24 09:29:32 +10:00
parent 4592f1a5b4
commit e34027ec76
5 changed files with 91 additions and 5 deletions

View File

@ -65130,7 +65130,9 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
"adcs r6, r6, #0\n\t"
"adcs r7, r7, #0\n\t"
"adcs r8, r8, r10\n\t"
"adc lr, lr, r12\n\t"
"adcs lr, lr, r12\n\t"
"mov r9, #0\n\t"
"adc r9, r9, #0\n\t"
/* Subtract overflow */
/* Add underflow - subtract neg underflow */
"subs r2, r2, r10\n\t"
@ -65140,6 +65142,29 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
"sbcs r6, r6, #0\n\t"
"sbcs r7, r7, #0\n\t"
"sbcs r8, r8, r12\n\t"
"sbcs lr, lr, r10\n\t"
"mov r10, #0\n\t"
"sbc r10, r10, #0\n\t"
"neg r10, r10\n\t"
/* Add overflow */
/* Subtract underflow - add neg underflow */
"adds r2, r2, r9\n\t"
"adcs r3, r3, #0\n\t"
"adcs r4, r4, #0\n\t"
"adcs r5, r5, r10\n\t"
"adcs r6, r6, #0\n\t"
"adcs r7, r7, #0\n\t"
"adcs r8, r8, r10\n\t"
"adc lr, lr, r9\n\t"
/* Subtract overflow */
/* Add underflow - subtract neg underflow */
"subs r2, r2, r10\n\t"
"sbcs r3, r3, #0\n\t"
"sbcs r4, r4, #0\n\t"
"sbcs r5, r5, r9\n\t"
"sbcs r6, r6, #0\n\t"
"sbcs r7, r7, #0\n\t"
"sbcs r8, r8, r9\n\t"
"sbc lr, lr, r10\n\t"
/* Store result */
"stm %[r], {r2, r3, r4, r5, r6, r7, r8, lr}\n\t"

View File

@ -22172,6 +22172,18 @@ static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit*
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
o = t[7] >> 32; t[7] &= 0xffffffff;
t[0] += o;
t[3] -= o;
t[6] -= o;
t[7] += o;
t[1] += t[0] >> 32; t[0] &= 0xffffffff;
t[2] += t[1] >> 32; t[1] &= 0xffffffff;
t[3] += t[2] >> 32; t[2] &= 0xffffffff;
t[4] += t[3] >> 32; t[3] &= 0xffffffff;
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
r[0] = (t[1] << 32) | t[0];
r[1] = (t[3] << 32) | t[2];
r[2] = (t[5] << 32) | t[4];

View File

@ -97775,6 +97775,18 @@ static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit*
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
o = t[7] >> 32; t[7] &= 0xffffffff;
t[0] += o;
t[3] -= o;
t[6] -= o;
t[7] += o;
t[1] += t[0] >> 32; t[0] &= 0xffffffff;
t[2] += t[1] >> 32; t[1] &= 0xffffffff;
t[3] += t[2] >> 32; t[2] &= 0xffffffff;
t[4] += t[3] >> 32; t[3] &= 0xffffffff;
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
r[0] = (sp_digit)t[0];
r[1] = (sp_digit)t[1];
r[2] = (sp_digit)t[2];

View File

@ -16951,7 +16951,9 @@ static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit*
"adcs r6, r6, #0\n\t"
"adcs r8, r8, #0\n\t"
"adcs r9, r9, r12\n\t"
"adc r14, r14, r11\n\t"
"adcs r14, r14, r11\n\t"
"mov r10, #0\n\t"
"adc r10, r10, #0\n\t"
/* Subtract overflow */
/* Add underflow - subtract neg underflow */
"subs r2, r2, r12\n\t"
@ -16961,6 +16963,29 @@ static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit*
"sbcs r6, r6, #0\n\t"
"sbcs r8, r8, #0\n\t"
"sbcs r9, r9, r11\n\t"
"sbcs r14, r14, r12\n\t"
"mov r12, #0\n\t"
"sbc r12, r12, #0\n\t"
"neg r12, r12\n\t"
/* Add overflow */
/* Subtract underflow - add neg underflow */
"adds r2, r2, r10\n\t"
"adcs r3, r3, #0\n\t"
"adcs r4, r4, #0\n\t"
"adds r5, r5, r12\n\t"
"adcs r6, r6, #0\n\t"
"adcs r8, r8, #0\n\t"
"adcs r9, r9, r12\n\t"
"adc r14, r14, r10\n\t"
/* Subtract overflow */
/* Add underflow - subtract neg underflow */
"subs r2, r2, r12\n\t"
"sbcs r3, r3, #0\n\t"
"sbcs r4, r4, #0\n\t"
"subs r5, r5, r10\n\t"
"sbcs r6, r6, #0\n\t"
"sbcs r8, r8, #0\n\t"
"sbcs r9, r9, r10\n\t"
"sbc r14, r14, r12\n\t"
/* Store result */
"str r2, [%[r], #0]\n\t"

View File

@ -8158,6 +8158,18 @@ static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit*
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
o = t[7] >> 32; t[7] &= 0xffffffff;
t[0] += o;
t[3] -= o;
t[6] -= o;
t[7] += o;
t[1] += t[0] >> 32; t[0] &= 0xffffffff;
t[2] += t[1] >> 32; t[1] &= 0xffffffff;
t[3] += t[2] >> 32; t[2] &= 0xffffffff;
t[4] += t[3] >> 32; t[3] &= 0xffffffff;
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
r[0] = (t[1] << 32) | t[0];
r[1] = (t[3] << 32) | t[2];
r[2] = (t[5] << 32) | t[4];