mirror of
https://github.com/wolfSSL/wolfssl.git
synced 2025-07-30 18:57:27 +02:00
SP EC ASM: mod_mul_norm fix
Handle corner case of overflow in last 32-bit word.
This commit is contained in:
@ -65130,7 +65130,9 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
|
|||||||
"adcs r6, r6, #0\n\t"
|
"adcs r6, r6, #0\n\t"
|
||||||
"adcs r7, r7, #0\n\t"
|
"adcs r7, r7, #0\n\t"
|
||||||
"adcs r8, r8, r10\n\t"
|
"adcs r8, r8, r10\n\t"
|
||||||
"adc lr, lr, r12\n\t"
|
"adcs lr, lr, r12\n\t"
|
||||||
|
"mov r9, #0\n\t"
|
||||||
|
"adc r9, r9, #0\n\t"
|
||||||
/* Subtract overflow */
|
/* Subtract overflow */
|
||||||
/* Add underflow - subtract neg underflow */
|
/* Add underflow - subtract neg underflow */
|
||||||
"subs r2, r2, r10\n\t"
|
"subs r2, r2, r10\n\t"
|
||||||
@ -65140,6 +65142,29 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di
|
|||||||
"sbcs r6, r6, #0\n\t"
|
"sbcs r6, r6, #0\n\t"
|
||||||
"sbcs r7, r7, #0\n\t"
|
"sbcs r7, r7, #0\n\t"
|
||||||
"sbcs r8, r8, r12\n\t"
|
"sbcs r8, r8, r12\n\t"
|
||||||
|
"sbcs lr, lr, r10\n\t"
|
||||||
|
"mov r10, #0\n\t"
|
||||||
|
"sbc r10, r10, #0\n\t"
|
||||||
|
"neg r10, r10\n\t"
|
||||||
|
/* Add overflow */
|
||||||
|
/* Subtract underflow - add neg underflow */
|
||||||
|
"adds r2, r2, r9\n\t"
|
||||||
|
"adcs r3, r3, #0\n\t"
|
||||||
|
"adcs r4, r4, #0\n\t"
|
||||||
|
"adcs r5, r5, r10\n\t"
|
||||||
|
"adcs r6, r6, #0\n\t"
|
||||||
|
"adcs r7, r7, #0\n\t"
|
||||||
|
"adcs r8, r8, r10\n\t"
|
||||||
|
"adc lr, lr, r9\n\t"
|
||||||
|
/* Subtract overflow */
|
||||||
|
/* Add underflow - subtract neg underflow */
|
||||||
|
"subs r2, r2, r10\n\t"
|
||||||
|
"sbcs r3, r3, #0\n\t"
|
||||||
|
"sbcs r4, r4, #0\n\t"
|
||||||
|
"sbcs r5, r5, r9\n\t"
|
||||||
|
"sbcs r6, r6, #0\n\t"
|
||||||
|
"sbcs r7, r7, #0\n\t"
|
||||||
|
"sbcs r8, r8, r9\n\t"
|
||||||
"sbc lr, lr, r10\n\t"
|
"sbc lr, lr, r10\n\t"
|
||||||
/* Store result */
|
/* Store result */
|
||||||
"stm %[r], {r2, r3, r4, r5, r6, r7, r8, lr}\n\t"
|
"stm %[r], {r2, r3, r4, r5, r6, r7, r8, lr}\n\t"
|
||||||
|
@ -22172,6 +22172,18 @@ static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit*
|
|||||||
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
|
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
|
||||||
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
|
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
|
||||||
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
|
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
|
||||||
|
o = t[7] >> 32; t[7] &= 0xffffffff;
|
||||||
|
t[0] += o;
|
||||||
|
t[3] -= o;
|
||||||
|
t[6] -= o;
|
||||||
|
t[7] += o;
|
||||||
|
t[1] += t[0] >> 32; t[0] &= 0xffffffff;
|
||||||
|
t[2] += t[1] >> 32; t[1] &= 0xffffffff;
|
||||||
|
t[3] += t[2] >> 32; t[2] &= 0xffffffff;
|
||||||
|
t[4] += t[3] >> 32; t[3] &= 0xffffffff;
|
||||||
|
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
|
||||||
|
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
|
||||||
|
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
|
||||||
r[0] = (t[1] << 32) | t[0];
|
r[0] = (t[1] << 32) | t[0];
|
||||||
r[1] = (t[3] << 32) | t[2];
|
r[1] = (t[3] << 32) | t[2];
|
||||||
r[2] = (t[5] << 32) | t[4];
|
r[2] = (t[5] << 32) | t[4];
|
||||||
|
@ -97775,6 +97775,18 @@ static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit*
|
|||||||
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
|
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
|
||||||
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
|
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
|
||||||
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
|
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
|
||||||
|
o = t[7] >> 32; t[7] &= 0xffffffff;
|
||||||
|
t[0] += o;
|
||||||
|
t[3] -= o;
|
||||||
|
t[6] -= o;
|
||||||
|
t[7] += o;
|
||||||
|
t[1] += t[0] >> 32; t[0] &= 0xffffffff;
|
||||||
|
t[2] += t[1] >> 32; t[1] &= 0xffffffff;
|
||||||
|
t[3] += t[2] >> 32; t[2] &= 0xffffffff;
|
||||||
|
t[4] += t[3] >> 32; t[3] &= 0xffffffff;
|
||||||
|
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
|
||||||
|
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
|
||||||
|
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
|
||||||
r[0] = (sp_digit)t[0];
|
r[0] = (sp_digit)t[0];
|
||||||
r[1] = (sp_digit)t[1];
|
r[1] = (sp_digit)t[1];
|
||||||
r[2] = (sp_digit)t[2];
|
r[2] = (sp_digit)t[2];
|
||||||
|
@ -16944,23 +16944,48 @@ static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit*
|
|||||||
"neg r12, r12\n\t"
|
"neg r12, r12\n\t"
|
||||||
/* Add overflow */
|
/* Add overflow */
|
||||||
/* Subtract underflow - add neg underflow */
|
/* Subtract underflow - add neg underflow */
|
||||||
"adds r2, r2, r11\n\t"
|
"adds r2, r2, r11\n\t"
|
||||||
|
"adcs r3, r3, #0\n\t"
|
||||||
|
"adcs r4, r4, #0\n\t"
|
||||||
|
"adds r5, r5, r12\n\t"
|
||||||
|
"adcs r6, r6, #0\n\t"
|
||||||
|
"adcs r8, r8, #0\n\t"
|
||||||
|
"adcs r9, r9, r12\n\t"
|
||||||
|
"adcs r14, r14, r11\n\t"
|
||||||
|
"mov r10, #0\n\t"
|
||||||
|
"adc r10, r10, #0\n\t"
|
||||||
|
/* Subtract overflow */
|
||||||
|
/* Add underflow - subtract neg underflow */
|
||||||
|
"subs r2, r2, r12\n\t"
|
||||||
|
"sbcs r3, r3, #0\n\t"
|
||||||
|
"sbcs r4, r4, #0\n\t"
|
||||||
|
"subs r5, r5, r11\n\t"
|
||||||
|
"sbcs r6, r6, #0\n\t"
|
||||||
|
"sbcs r8, r8, #0\n\t"
|
||||||
|
"sbcs r9, r9, r11\n\t"
|
||||||
|
"sbcs r14, r14, r12\n\t"
|
||||||
|
"mov r12, #0\n\t"
|
||||||
|
"sbc r12, r12, #0\n\t"
|
||||||
|
"neg r12, r12\n\t"
|
||||||
|
/* Add overflow */
|
||||||
|
/* Subtract underflow - add neg underflow */
|
||||||
|
"adds r2, r2, r10\n\t"
|
||||||
"adcs r3, r3, #0\n\t"
|
"adcs r3, r3, #0\n\t"
|
||||||
"adcs r4, r4, #0\n\t"
|
"adcs r4, r4, #0\n\t"
|
||||||
"adds r5, r5, r12\n\t"
|
"adds r5, r5, r12\n\t"
|
||||||
"adcs r6, r6, #0\n\t"
|
"adcs r6, r6, #0\n\t"
|
||||||
"adcs r8, r8, #0\n\t"
|
"adcs r8, r8, #0\n\t"
|
||||||
"adcs r9, r9, r12\n\t"
|
"adcs r9, r9, r12\n\t"
|
||||||
"adc r14, r14, r11\n\t"
|
"adc r14, r14, r10\n\t"
|
||||||
/* Subtract overflow */
|
/* Subtract overflow */
|
||||||
/* Add underflow - subtract neg underflow */
|
/* Add underflow - subtract neg underflow */
|
||||||
"subs r2, r2, r12\n\t"
|
"subs r2, r2, r12\n\t"
|
||||||
"sbcs r3, r3, #0\n\t"
|
"sbcs r3, r3, #0\n\t"
|
||||||
"sbcs r4, r4, #0\n\t"
|
"sbcs r4, r4, #0\n\t"
|
||||||
"subs r5, r5, r11\n\t"
|
"subs r5, r5, r10\n\t"
|
||||||
"sbcs r6, r6, #0\n\t"
|
"sbcs r6, r6, #0\n\t"
|
||||||
"sbcs r8, r8, #0\n\t"
|
"sbcs r8, r8, #0\n\t"
|
||||||
"sbcs r9, r9, r11\n\t"
|
"sbcs r9, r9, r10\n\t"
|
||||||
"sbc r14, r14, r12\n\t"
|
"sbc r14, r14, r12\n\t"
|
||||||
/* Store result */
|
/* Store result */
|
||||||
"str r2, [%[r], #0]\n\t"
|
"str r2, [%[r], #0]\n\t"
|
||||||
|
@ -8158,6 +8158,18 @@ static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit*
|
|||||||
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
|
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
|
||||||
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
|
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
|
||||||
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
|
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
|
||||||
|
o = t[7] >> 32; t[7] &= 0xffffffff;
|
||||||
|
t[0] += o;
|
||||||
|
t[3] -= o;
|
||||||
|
t[6] -= o;
|
||||||
|
t[7] += o;
|
||||||
|
t[1] += t[0] >> 32; t[0] &= 0xffffffff;
|
||||||
|
t[2] += t[1] >> 32; t[1] &= 0xffffffff;
|
||||||
|
t[3] += t[2] >> 32; t[2] &= 0xffffffff;
|
||||||
|
t[4] += t[3] >> 32; t[3] &= 0xffffffff;
|
||||||
|
t[5] += t[4] >> 32; t[4] &= 0xffffffff;
|
||||||
|
t[6] += t[5] >> 32; t[5] &= 0xffffffff;
|
||||||
|
t[7] += t[6] >> 32; t[6] &= 0xffffffff;
|
||||||
r[0] = (t[1] << 32) | t[0];
|
r[0] = (t[1] << 32) | t[0];
|
||||||
r[1] = (t[3] << 32) | t[2];
|
r[1] = (t[3] << 32) | t[2];
|
||||||
r[2] = (t[5] << 32) | t[4];
|
r[2] = (t[5] << 32) | t[4];
|
||||||
|
Reference in New Issue
Block a user