From e34027ec768eafe64815ae55584e5f5a5ce6e96d Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Tue, 24 Jan 2023 09:29:32 +1000 Subject: [PATCH] SP EC ASM: mod_mul_norm fix Handle corner case of overflow in last 32-bit word. --- wolfcrypt/src/sp_arm32.c | 27 ++++++++++++++++++++++++++- wolfcrypt/src/sp_arm64.c | 12 ++++++++++++ wolfcrypt/src/sp_armthumb.c | 12 ++++++++++++ wolfcrypt/src/sp_cortexm.c | 33 +++++++++++++++++++++++++++++---- wolfcrypt/src/sp_x86_64.c | 12 ++++++++++++ 5 files changed, 91 insertions(+), 5 deletions(-) diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index 635c59031..b87095055 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -65130,7 +65130,9 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di "adcs r6, r6, #0\n\t" "adcs r7, r7, #0\n\t" "adcs r8, r8, r10\n\t" - "adc lr, lr, r12\n\t" + "adcs lr, lr, r12\n\t" + "mov r9, #0\n\t" + "adc r9, r9, #0\n\t" /* Subtract overflow */ /* Add underflow - subtract neg underflow */ "subs r2, r2, r10\n\t" @@ -65140,6 +65142,29 @@ static int sp_256_mod_mul_norm_8(sp_digit* r_p, const sp_digit* a_p, const sp_di "sbcs r6, r6, #0\n\t" "sbcs r7, r7, #0\n\t" "sbcs r8, r8, r12\n\t" + "sbcs lr, lr, r10\n\t" + "mov r10, #0\n\t" + "sbc r10, r10, #0\n\t" + "neg r10, r10\n\t" + /* Add overflow */ + /* Subtract underflow - add neg underflow */ + "adds r2, r2, r9\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, r10\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, r10\n\t" + "adc lr, lr, r9\n\t" + /* Subtract overflow */ + /* Add underflow - subtract neg underflow */ + "subs r2, r2, r10\n\t" + "sbcs r3, r3, #0\n\t" + "sbcs r4, r4, #0\n\t" + "sbcs r5, r5, r9\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r7, r7, #0\n\t" + "sbcs r8, r8, r9\n\t" "sbc lr, lr, r10\n\t" /* Store result */ "stm %[r], {r2, r3, r4, r5, r6, r7, r8, lr}\n\t" diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c index 30f902790..b3536b71a 100644 --- a/wolfcrypt/src/sp_arm64.c +++ b/wolfcrypt/src/sp_arm64.c @@ -22172,6 +22172,18 @@ static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit* t[5] += t[4] >> 32; t[4] &= 0xffffffff; t[6] += t[5] >> 32; t[5] &= 0xffffffff; t[7] += t[6] >> 32; t[6] &= 0xffffffff; + o = t[7] >> 32; t[7] &= 0xffffffff; + t[0] += o; + t[3] -= o; + t[6] -= o; + t[7] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; r[0] = (t[1] << 32) | t[0]; r[1] = (t[3] << 32) | t[2]; r[2] = (t[5] << 32) | t[4]; diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c index 7f47c7b02..863741371 100644 --- a/wolfcrypt/src/sp_armthumb.c +++ b/wolfcrypt/src/sp_armthumb.c @@ -97775,6 +97775,18 @@ static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* t[5] += t[4] >> 32; t[4] &= 0xffffffff; t[6] += t[5] >> 32; t[5] &= 0xffffffff; t[7] += t[6] >> 32; t[6] &= 0xffffffff; + o = t[7] >> 32; t[7] &= 0xffffffff; + t[0] += o; + t[3] -= o; + t[6] -= o; + t[7] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; r[0] = (sp_digit)t[0]; r[1] = (sp_digit)t[1]; r[2] = (sp_digit)t[2]; diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index 42ecefbbf..2c16e7769 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -16944,23 +16944,48 @@ static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* "neg r12, r12\n\t" /* Add overflow */ /* Subtract underflow - add neg underflow */ - "adds r2, r2, r11\n\t" + "adds r2, r2, r11\n\t" + "adcs r3, r3, #0\n\t" + "adcs r4, r4, #0\n\t" + "adds r5, r5, r12\n\t" + "adcs r6, r6, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, r12\n\t" + "adcs r14, r14, r11\n\t" + "mov r10, #0\n\t" + "adc r10, r10, #0\n\t" + /* Subtract overflow */ + /* Add underflow - subtract neg underflow */ + "subs r2, r2, r12\n\t" + "sbcs r3, r3, #0\n\t" + "sbcs r4, r4, #0\n\t" + "subs r5, r5, r11\n\t" + "sbcs r6, r6, #0\n\t" + "sbcs r8, r8, #0\n\t" + "sbcs r9, r9, r11\n\t" + "sbcs r14, r14, r12\n\t" + "mov r12, #0\n\t" + "sbc r12, r12, #0\n\t" + "neg r12, r12\n\t" + /* Add overflow */ + /* Subtract underflow - add neg underflow */ + "adds r2, r2, r10\n\t" "adcs r3, r3, #0\n\t" "adcs r4, r4, #0\n\t" "adds r5, r5, r12\n\t" "adcs r6, r6, #0\n\t" "adcs r8, r8, #0\n\t" "adcs r9, r9, r12\n\t" - "adc r14, r14, r11\n\t" + "adc r14, r14, r10\n\t" /* Subtract overflow */ /* Add underflow - subtract neg underflow */ "subs r2, r2, r12\n\t" "sbcs r3, r3, #0\n\t" "sbcs r4, r4, #0\n\t" - "subs r5, r5, r11\n\t" + "subs r5, r5, r10\n\t" "sbcs r6, r6, #0\n\t" "sbcs r8, r8, #0\n\t" - "sbcs r9, r9, r11\n\t" + "sbcs r9, r9, r10\n\t" "sbc r14, r14, r12\n\t" /* Store result */ "str r2, [%[r], #0]\n\t" diff --git a/wolfcrypt/src/sp_x86_64.c b/wolfcrypt/src/sp_x86_64.c index 6c61fbba4..3efdba2db 100644 --- a/wolfcrypt/src/sp_x86_64.c +++ b/wolfcrypt/src/sp_x86_64.c @@ -8158,6 +8158,18 @@ static int sp_256_mod_mul_norm_4(sp_digit* r, const sp_digit* a, const sp_digit* t[5] += t[4] >> 32; t[4] &= 0xffffffff; t[6] += t[5] >> 32; t[5] &= 0xffffffff; t[7] += t[6] >> 32; t[6] &= 0xffffffff; + o = t[7] >> 32; t[7] &= 0xffffffff; + t[0] += o; + t[3] -= o; + t[6] -= o; + t[7] += o; + t[1] += t[0] >> 32; t[0] &= 0xffffffff; + t[2] += t[1] >> 32; t[1] &= 0xffffffff; + t[3] += t[2] >> 32; t[2] &= 0xffffffff; + t[4] += t[3] >> 32; t[3] &= 0xffffffff; + t[5] += t[4] >> 32; t[4] &= 0xffffffff; + t[6] += t[5] >> 32; t[5] &= 0xffffffff; + t[7] += t[6] >> 32; t[6] &= 0xffffffff; r[0] = (t[1] << 32) | t[0]; r[1] = (t[3] << 32) | t[2]; r[2] = (t[5] << 32) | t[4];