Fix div_word without using div.
Fix ARM32 and Cortex-M builds to work for 4096 again.
This commit is contained in:
Sean Parkinson
2022-05-13 09:37:24 +10:00
parent 6aaee73585
commit e8160f049e
4 changed files with 210 additions and 368 deletions

View File

@@ -20943,7 +20943,7 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
u += sp_4096_add_128(r + 64, r + 64, z1); u += sp_4096_add_128(r + 64, r + 64, z1);
XMEMSET(a1 + 1, 0, sizeof(sp_digit) * (64 - 1)); XMEMSET(a1 + 1, 0, sizeof(sp_digit) * (64 - 1));
a1[0] = u; a1[0] = u;
(void)sp_4096_add_64(r + 192, r + 192, a1); (void)sp_2048_add_64(r + 192, r + 192, a1);
} }
/* Square a and put result in r. (r = a * a) /* Square a and put result in r. (r = a * a)

View File

@@ -1937,7 +1937,7 @@ static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 27; i >= 1; i--) { for (i = 27; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 28); t1 += t1 + (((sp_uint32)t0 >> 28) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2; r += r + t2;
@@ -1951,11 +1951,6 @@ static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58); r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 29);
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign; m *= sign;
@@ -3004,7 +2999,7 @@ static WC_INLINE sp_digit sp_2048_div_word_72(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 27; i >= 1; i--) { for (i = 27; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 28); t1 += t1 + (((sp_uint32)t0 >> 28) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2; r += r + t2;
@@ -3018,11 +3013,6 @@ static WC_INLINE sp_digit sp_2048_div_word_72(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58); r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 29);
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign; m *= sign;
@@ -5629,7 +5619,7 @@ static WC_INLINE sp_digit sp_3072_div_word_53(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 27; i >= 1; i--) { for (i = 27; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 28); t1 += t1 + (((sp_uint32)t0 >> 28) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2; r += r + t2;
@@ -5643,11 +5633,6 @@ static WC_INLINE sp_digit sp_3072_div_word_53(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58); r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 29);
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign; m *= sign;
@@ -6473,7 +6458,7 @@ static WC_INLINE sp_digit sp_3072_div_word_106(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 27; i >= 1; i--) { for (i = 27; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 28); t1 += t1 + (((sp_uint32)t0 >> 28) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2; r += r + t2;
@@ -6487,11 +6472,6 @@ static WC_INLINE sp_digit sp_3072_div_word_106(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58); r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 29);
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign; m *= sign;
@@ -9662,7 +9642,7 @@ static WC_INLINE sp_digit sp_3072_div_word_56(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 26; i >= 1; i--) { for (i = 26; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 27); t1 += t1 + (((sp_uint32)t0 >> 27) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2; r += r + t2;
@@ -9676,11 +9656,6 @@ static WC_INLINE sp_digit sp_3072_div_word_56(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
r += (m >> 56) - (sp_digit)(d >> 56); r += (m >> 56) - (sp_digit)(d >> 56);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 28);
m = d - ((sp_int64)r * div);
r += (m >> 56) - (sp_digit)(d >> 56);
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign; m *= sign;
@@ -10585,7 +10560,7 @@ static WC_INLINE sp_digit sp_3072_div_word_112(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 26; i >= 1; i--) { for (i = 26; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 27); t1 += t1 + (((sp_uint32)t0 >> 27) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2; r += r + t2;
@@ -10599,11 +10574,6 @@ static WC_INLINE sp_digit sp_3072_div_word_112(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
r += (m >> 56) - (sp_digit)(d >> 56); r += (m >> 56) - (sp_digit)(d >> 56);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 28);
m = d - ((sp_int64)r * div);
r += (m >> 56) - (sp_digit)(d >> 56);
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign; m *= sign;
@@ -13292,7 +13262,7 @@ static WC_INLINE sp_digit sp_4096_div_word_71(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 27; i >= 1; i--) { for (i = 27; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 28); t1 += t1 + (((sp_uint32)t0 >> 28) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2; r += r + t2;
@@ -13306,11 +13276,6 @@ static WC_INLINE sp_digit sp_4096_div_word_71(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58); r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 29);
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign; m *= sign;
@@ -14137,7 +14102,7 @@ static WC_INLINE sp_digit sp_4096_div_word_142(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 27; i >= 1; i--) { for (i = 27; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 28); t1 += t1 + (((sp_uint32)t0 >> 28) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2; r += r + t2;
@@ -14151,11 +14116,6 @@ static WC_INLINE sp_digit sp_4096_div_word_142(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58); r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 29);
m = d - ((sp_int64)r * div);
r += (m >> 58) - (sp_digit)(d >> 58);
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign; m *= sign;
@@ -17205,7 +17165,7 @@ static WC_INLINE sp_digit sp_4096_div_word_81(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 24; i >= 1; i--) { for (i = 24; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 25); t1 += t1 + (((sp_uint32)t0 >> 25) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2; r += r + t2;
@@ -17219,11 +17179,6 @@ static WC_INLINE sp_digit sp_4096_div_word_81(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
r += (m >> 52) - (sp_digit)(d >> 52); r += (m >> 52) - (sp_digit)(d >> 52);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 26);
m = d - ((sp_int64)r * div);
r += (m >> 52) - (sp_digit)(d >> 52);
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign; m *= sign;
@@ -18114,7 +18069,7 @@ static WC_INLINE sp_digit sp_4096_div_word_162(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 24; i >= 1; i--) { for (i = 24; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 25); t1 += t1 + (((sp_uint32)t0 >> 25) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2; r += r + t2;
@@ -18128,11 +18083,6 @@ static WC_INLINE sp_digit sp_4096_div_word_162(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
r += (m >> 52) - (sp_digit)(d >> 52); r += (m >> 52) - (sp_digit)(d >> 52);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 26);
m = d - ((sp_int64)r * div);
r += (m >> 52) - (sp_digit)(d >> 52);
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign; m *= sign;
@@ -41044,96 +40994,112 @@ SP_NOINLINE static void sp_521_mul_d_21(sp_digit* r, const sp_digit* a,
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }
static WC_INLINE sp_digit sp_521_div_word_21(sp_digit d1, sp_digit d0, SP_NOINLINE static void sp_521_lshift_42(sp_digit* r, const sp_digit* a,
sp_digit div) byte n)
{ {
#ifdef SP_USE_DIVTI3 #ifdef WOLFSSL_SP_SMALL
sp_int64 d = ((sp_int64)d1 << 25) + d0;
return d / div;
#elif defined(__x86_64__) || defined(__i386__)
sp_int64 d = ((sp_int64)d1 << 25) + d0;
sp_uint32 lo = (sp_uint32)d;
sp_digit hi = (sp_digit)(d >> 32);
__asm__ __volatile__ (
"idiv %2"
: "+a" (lo)
: "d" (hi), "r" (div)
: "cc"
);
return (sp_digit)lo;
#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV)
sp_int64 d = ((sp_int64)d1 << 25) + d0;
sp_digit dv = (div >> 1) + 1;
sp_digit t1 = (sp_digit)(d >> 25);
sp_digit t0 = (sp_digit)(d & 0x1ffffff);
sp_digit t2;
sp_digit sign;
sp_digit r;
int i; int i;
sp_int64 m;
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); r[42] = a[41] >> (25 - n);
t1 -= dv & (0 - r); for (i=41; i>0; i--) {
for (i = 23; i >= 1; i--) { r[i] = ((a[i] << n) | (a[i-1] >> (25 - n))) & 0x1ffffff;
t1 += t1 + ((sp_uint32)t0 >> 24);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2;
t1 -= dv & (0 - t2);
t1 += t2;
} }
r += r + 1;
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 25);
m = d - ((sp_int64)r * div);
r += (m >> 50) - (sp_digit)(d >> 50);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 25);
m = d - ((sp_int64)r * div);
r += (m >> 50) - (sp_digit)(d >> 50);
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;
t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31);
r += sign * t2;
m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign;
t2 = (sp_digit)(((sp_uint32)(div - m)) >> 31);
r += sign * t2;
return r;
#else #else
sp_int64 d = ((sp_int64)d1 << 25) + d0; sp_int_digit s;
sp_digit r = 0; sp_int_digit t;
sp_digit t;
sp_digit dv = (div >> 10) + 1;
t = (sp_digit)(d >> 20); s = (sp_int_digit)a[41];
t = (t / dv) << 10; r[42] = s >> (25U - n);
r += t; s = (sp_int_digit)(a[41]); t = (sp_int_digit)(a[40]);
d -= (sp_int64)t * div; r[41] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
t = (sp_digit)(d >> 5); s = (sp_int_digit)(a[40]); t = (sp_int_digit)(a[39]);
t = t / (dv << 5); r[40] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
r += t; s = (sp_int_digit)(a[39]); t = (sp_int_digit)(a[38]);
d -= (sp_int64)t * div; r[39] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
t = (sp_digit)d; s = (sp_int_digit)(a[38]); t = (sp_int_digit)(a[37]);
t = t / div; r[38] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
r += t; s = (sp_int_digit)(a[37]); t = (sp_int_digit)(a[36]);
d -= (sp_int64)t * div; r[37] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
return r; s = (sp_int_digit)(a[36]); t = (sp_int_digit)(a[35]);
#endif r[36] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[35]); t = (sp_int_digit)(a[34]);
r[35] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[34]); t = (sp_int_digit)(a[33]);
r[34] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[33]); t = (sp_int_digit)(a[32]);
r[33] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[32]); t = (sp_int_digit)(a[31]);
r[32] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[31]); t = (sp_int_digit)(a[30]);
r[31] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[30]); t = (sp_int_digit)(a[29]);
r[30] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[29]); t = (sp_int_digit)(a[28]);
r[29] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[28]); t = (sp_int_digit)(a[27]);
r[28] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[27]); t = (sp_int_digit)(a[26]);
r[27] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[26]); t = (sp_int_digit)(a[25]);
r[26] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[25]); t = (sp_int_digit)(a[24]);
r[25] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[24]); t = (sp_int_digit)(a[23]);
r[24] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[23]); t = (sp_int_digit)(a[22]);
r[23] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[22]); t = (sp_int_digit)(a[21]);
r[22] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[21]); t = (sp_int_digit)(a[20]);
r[21] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[20]); t = (sp_int_digit)(a[19]);
r[20] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[19]); t = (sp_int_digit)(a[18]);
r[19] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[18]); t = (sp_int_digit)(a[17]);
r[18] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
r[17] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
r[16] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
r[15] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
r[14] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
r[13] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
r[12] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
r[11] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
r[10] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
r[9] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
r[8] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
r[7] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
r[6] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
r[5] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
r[4] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
r[3] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
r[2] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
r[1] = ((s << n) | (t >> (25U - n))) & 0x1ffffff;
#endif /* WOLFSSL_SP_SMALL */
r[0] = (a[0] << n) & 0x1ffffff;
} }
/* Divide d in a and put remainder into r (m*d + r = a) /* Divide d in a and put remainder into r (m*d + r = a)
* m is not calculated as it is not needed at this time. * m is not calculated as it is not needed at this time.
* *
* Large number of bits in last word. * Simplified based on top word of divisor being (1 << 25) - 1
* *
* a Number to be divided. * a Number to be divided.
* d Number to divide with. * d Number to divide with.
@@ -41145,60 +41111,49 @@ static int sp_521_div_21(const sp_digit* a, const sp_digit* d,
const sp_digit* m, sp_digit* r) const sp_digit* m, sp_digit* r)
{ {
int i; int i;
sp_digit dv;
sp_digit r1; sp_digit r1;
sp_digit mask;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* t1 = NULL; sp_digit* t1 = NULL;
#else #else
sp_digit t1[3 * 21 + 1]; sp_digit t1[4 * 21 + 3];
#endif #endif
sp_digit* t2 = NULL; sp_digit* t2 = NULL;
sp_digit* sd = NULL;
int err = MP_OKAY; int err = MP_OKAY;
(void)m; (void)m;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 21 + 1), NULL, t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 21 + 3), NULL,
DYNAMIC_TYPE_TMP_BUFFER); DYNAMIC_TYPE_TMP_BUFFER);
if (t1 == NULL) if (t1 == NULL)
err = MEMORY_E; err = MEMORY_E;
#endif #endif
(void)m;
if (err == MP_OKAY) { if (err == MP_OKAY) {
t2 = t1 + 2 * 21; t2 = t1 + 42 + 1;
sd = t2 + 21 + 1;
dv = d[20]; sp_521_mul_d_21(sd, d, (sp_digit)1 << 4);
XMEMCPY(t1, a, sizeof(*t1) * 2U * 21U); sp_521_lshift_42(t1, a, 4);
t1[21 + 21] += t1[21 + 21 - 1] >> 25;
t1[21 + 21 - 1] &= 0x1ffffff;
for (i=20; i>=0; i--) { for (i=20; i>=0; i--) {
t1[21 + i] += t1[21 + i - 1] >> 25; r1 = t1[21 + i];
t1[21 + i - 1] &= 0x1ffffff; sp_521_mul_d_21(t2, sd, r1);
r1 = sp_521_div_word_21(t1[21 + i], t1[21 + i - 1], dv);
sp_521_mul_d_21(t2, d, r1);
(void)sp_521_sub_21(&t1[i], &t1[i], t2); (void)sp_521_sub_21(&t1[i], &t1[i], t2);
sp_521_norm_21(&t1[i]);
t1[21 + i] -= t2[21]; t1[21 + i] -= t2[21];
t1[21 + i] += t1[21 + i - 1] >> 25; sp_521_norm_21(&t1[i + 1]);
t1[21 + i - 1] &= 0x1ffffff;
r1 = sp_521_div_word_21(-t1[21 + i], -t1[21 + i - 1], dv);
r1++;
sp_521_mul_d_21(t2, d, r1);
(void)sp_521_add_21(&t1[i], &t1[i], t2);
t1[21 + i] += t1[21 + i - 1] >> 25;
t1[21 + i - 1] &= 0x1ffffff;
}
t1[21 - 1] += t1[21 - 2] >> 25;
t1[21 - 2] &= 0x1ffffff;
r1 = t1[21 - 1] / dv;
sp_521_mul_d_21(t2, d, r1); mask = ~((t1[21 + i] - 1) >> 31);
(void)sp_521_sub_21(t1, t1, t2); sp_521_cond_sub_21(t1 + i, t1 + i, sd, mask);
XMEMCPY(r, t1, sizeof(*r) * 42U); sp_521_norm_21(&t1[i + 1]);
for (i=0; i<20; i++) {
r[i+1] += r[i] >> 25;
r[i] &= 0x1ffffff;
} }
sp_521_cond_add_21(r, r, d, r[20] >> 31); sp_521_norm_21(t1);
sp_521_rshift_21(r, t1, 4);
} }
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
@@ -43741,7 +43696,7 @@ static WC_INLINE sp_digit sp_1024_div_word_42(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); r = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 23; i >= 1; i--) { for (i = 23; i >= 1; i--) {
t1 += t1 + ((sp_uint32)t0 >> 24); t1 += t1 + (((sp_uint32)t0 >> 24) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31); t2 = (sp_digit)(((sp_uint32)(dv - t1)) >> 31);
r += r + t2; r += r + t2;
@@ -43755,11 +43710,6 @@ static WC_INLINE sp_digit sp_1024_div_word_42(sp_digit d1, sp_digit d0,
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
r += (m >> 50) - (sp_digit)(d >> 50); r += (m >> 50) - (sp_digit)(d >> 50);
m = d - ((sp_int64)r * div);
r += (sp_digit)(m >> 25);
m = d - ((sp_int64)r * div);
r += (m >> 50) - (sp_digit)(d >> 50);
m = d - ((sp_int64)r * div); m = d - ((sp_int64)r * div);
sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint32)m >> 31)) * 2 + 1;
m *= sign; m *= sign;

View File

@@ -857,7 +857,7 @@ static WC_INLINE sp_digit sp_2048_div_word_17(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 59; i >= 1; i--) { for (i = 59; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 60); t1 += t1 + (((sp_uint64)t0 >> 60) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2; r += r + t2;
@@ -871,11 +871,6 @@ static WC_INLINE sp_digit sp_2048_div_word_17(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
r += (m >> 122) - (sp_digit)(d >> 122); r += (m >> 122) - (sp_digit)(d >> 122);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 61);
m = d - ((sp_int128)r * div);
r += (m >> 122) - (sp_digit)(d >> 122);
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign; m *= sign;
@@ -1687,7 +1682,7 @@ static WC_INLINE sp_digit sp_2048_div_word_34(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 59; i >= 1; i--) { for (i = 59; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 60); t1 += t1 + (((sp_uint64)t0 >> 60) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2; r += r + t2;
@@ -1701,11 +1696,6 @@ static WC_INLINE sp_digit sp_2048_div_word_34(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
r += (m >> 122) - (sp_digit)(d >> 122); r += (m >> 122) - (sp_digit)(d >> 122);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 61);
m = d - ((sp_int128)r * div);
r += (m >> 122) - (sp_digit)(d >> 122);
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign; m *= sign;
@@ -4391,7 +4381,7 @@ static WC_INLINE sp_digit sp_2048_div_word_18(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 55; i >= 1; i--) { for (i = 55; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 56); t1 += t1 + (((sp_uint64)t0 >> 56) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2; r += r + t2;
@@ -4405,11 +4395,6 @@ static WC_INLINE sp_digit sp_2048_div_word_18(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114); r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 57);
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign; m *= sign;
@@ -5282,7 +5267,7 @@ static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 55; i >= 1; i--) { for (i = 55; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 56); t1 += t1 + (((sp_uint64)t0 >> 56) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2; r += r + t2;
@@ -5296,11 +5281,6 @@ static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114); r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 57);
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign; m *= sign;
@@ -7740,7 +7720,7 @@ static WC_INLINE sp_digit sp_3072_div_word_26(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 58; i >= 1; i--) { for (i = 58; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 59); t1 += t1 + (((sp_uint64)t0 >> 59) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2; r += r + t2;
@@ -7754,11 +7734,6 @@ static WC_INLINE sp_digit sp_3072_div_word_26(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
r += (m >> 120) - (sp_digit)(d >> 120); r += (m >> 120) - (sp_digit)(d >> 120);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 60);
m = d - ((sp_int128)r * div);
r += (m >> 120) - (sp_digit)(d >> 120);
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign; m *= sign;
@@ -8576,7 +8551,7 @@ static WC_INLINE sp_digit sp_3072_div_word_52(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 58; i >= 1; i--) { for (i = 58; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 59); t1 += t1 + (((sp_uint64)t0 >> 59) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2; r += r + t2;
@@ -8590,11 +8565,6 @@ static WC_INLINE sp_digit sp_3072_div_word_52(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
r += (m >> 120) - (sp_digit)(d >> 120); r += (m >> 120) - (sp_digit)(d >> 120);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 60);
m = d - ((sp_int128)r * div);
r += (m >> 120) - (sp_digit)(d >> 120);
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign; m *= sign;
@@ -11418,7 +11388,7 @@ static WC_INLINE sp_digit sp_3072_div_word_27(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 55; i >= 1; i--) { for (i = 55; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 56); t1 += t1 + (((sp_uint64)t0 >> 56) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2; r += r + t2;
@@ -11432,11 +11402,6 @@ static WC_INLINE sp_digit sp_3072_div_word_27(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114); r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 57);
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign; m *= sign;
@@ -12320,7 +12285,7 @@ static WC_INLINE sp_digit sp_3072_div_word_54(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 55; i >= 1; i--) { for (i = 55; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 56); t1 += t1 + (((sp_uint64)t0 >> 56) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2; r += r + t2;
@@ -12334,11 +12299,6 @@ static WC_INLINE sp_digit sp_3072_div_word_54(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114); r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 57);
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign; m *= sign;
@@ -14820,7 +14780,7 @@ static WC_INLINE sp_digit sp_4096_div_word_35(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 57; i >= 1; i--) { for (i = 57; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 58); t1 += t1 + (((sp_uint64)t0 >> 58) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2; r += r + t2;
@@ -14834,11 +14794,6 @@ static WC_INLINE sp_digit sp_4096_div_word_35(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
r += (m >> 118) - (sp_digit)(d >> 118); r += (m >> 118) - (sp_digit)(d >> 118);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 59);
m = d - ((sp_int128)r * div);
r += (m >> 118) - (sp_digit)(d >> 118);
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign; m *= sign;
@@ -15651,7 +15606,7 @@ static WC_INLINE sp_digit sp_4096_div_word_70(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 57; i >= 1; i--) { for (i = 57; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 58); t1 += t1 + (((sp_uint64)t0 >> 58) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2; r += r + t2;
@@ -15665,11 +15620,6 @@ static WC_INLINE sp_digit sp_4096_div_word_70(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
r += (m >> 118) - (sp_digit)(d >> 118); r += (m >> 118) - (sp_digit)(d >> 118);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 59);
m = d - ((sp_int128)r * div);
r += (m >> 118) - (sp_digit)(d >> 118);
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign; m *= sign;
@@ -18548,7 +18498,7 @@ static WC_INLINE sp_digit sp_4096_div_word_39(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 51; i >= 1; i--) { for (i = 51; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 52); t1 += t1 + (((sp_uint64)t0 >> 52) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2; r += r + t2;
@@ -18562,11 +18512,6 @@ static WC_INLINE sp_digit sp_4096_div_word_39(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
r += (m >> 106) - (sp_digit)(d >> 106); r += (m >> 106) - (sp_digit)(d >> 106);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 53);
m = d - ((sp_int128)r * div);
r += (m >> 106) - (sp_digit)(d >> 106);
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign; m *= sign;
@@ -19451,7 +19396,7 @@ static WC_INLINE sp_digit sp_4096_div_word_78(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 51; i >= 1; i--) { for (i = 51; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 52); t1 += t1 + (((sp_uint64)t0 >> 52) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2; r += r + t2;
@@ -19465,11 +19410,6 @@ static WC_INLINE sp_digit sp_4096_div_word_78(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
r += (m >> 106) - (sp_digit)(d >> 106); r += (m >> 106) - (sp_digit)(d >> 106);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 53);
m = d - ((sp_int128)r * div);
r += (m >> 106) - (sp_digit)(d >> 106);
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign; m *= sign;
@@ -40705,96 +40645,64 @@ SP_NOINLINE static void sp_521_mul_d_9(sp_digit* r, const sp_digit* a,
#endif /* WOLFSSL_SP_SMALL */ #endif /* WOLFSSL_SP_SMALL */
} }
static WC_INLINE sp_digit sp_521_div_word_9(sp_digit d1, sp_digit d0, SP_NOINLINE static void sp_521_lshift_18(sp_digit* r, const sp_digit* a,
sp_digit div) byte n)
{ {
#ifdef SP_USE_DIVTI3 #ifdef WOLFSSL_SP_SMALL
sp_int128 d = ((sp_int128)d1 << 58) + d0;
return d / div;
#elif defined(__x86_64__) || defined(__i386__)
sp_int128 d = ((sp_int128)d1 << 58) + d0;
sp_uint64 lo = (sp_uint64)d;
sp_digit hi = (sp_digit)(d >> 64);
__asm__ __volatile__ (
"idiv %2"
: "+a" (lo)
: "d" (hi), "r" (div)
: "cc"
);
return (sp_digit)lo;
#elif !defined(__aarch64__) && !defined(SP_DIV_WORD_USE_DIV)
sp_int128 d = ((sp_int128)d1 << 58) + d0;
sp_digit dv = (div >> 1) + 1;
sp_digit t1 = (sp_digit)(d >> 58);
sp_digit t0 = (sp_digit)(d & 0x3ffffffffffffffL);
sp_digit t2;
sp_digit sign;
sp_digit r;
int i; int i;
sp_int128 m;
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); r[18] = a[17] >> (58 - n);
t1 -= dv & (0 - r); for (i=17; i>0; i--) {
for (i = 56; i >= 1; i--) { r[i] = ((a[i] << n) | (a[i-1] >> (58 - n))) & 0x3ffffffffffffffL;
t1 += t1 + ((sp_uint64)t0 >> 57);
t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2;
t1 -= dv & (0 - t2);
t1 += t2;
} }
r += r + 1;
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 58);
m = d - ((sp_int128)r * div);
r += (m >> 116) - (sp_digit)(d >> 116);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 58);
m = d - ((sp_int128)r * div);
r += (m >> 116) - (sp_digit)(d >> 116);
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63);
r += sign * t2;
m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign;
t2 = (sp_digit)(((sp_uint64)(div - m)) >> 63);
r += sign * t2;
return r;
#else #else
sp_int128 d = ((sp_int128)d1 << 58) + d0; sp_int_digit s;
sp_digit r = 0; sp_int_digit t;
sp_digit t;
sp_digit dv = (div >> 27) + 1;
t = (sp_digit)(d >> 54); s = (sp_int_digit)a[17];
t = (t / dv) << 27; r[18] = s >> (58U - n);
r += t; s = (sp_int_digit)(a[17]); t = (sp_int_digit)(a[16]);
d -= (sp_int128)t * div; r[17] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
t = (sp_digit)(d >> 23); s = (sp_int_digit)(a[16]); t = (sp_int_digit)(a[15]);
t = t / (dv << 4); r[16] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
r += t; s = (sp_int_digit)(a[15]); t = (sp_int_digit)(a[14]);
d -= (sp_int128)t * div; r[15] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
t = (sp_digit)d; s = (sp_int_digit)(a[14]); t = (sp_int_digit)(a[13]);
t = t / div; r[14] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
r += t; s = (sp_int_digit)(a[13]); t = (sp_int_digit)(a[12]);
d -= (sp_int128)t * div; r[13] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
return r; s = (sp_int_digit)(a[12]); t = (sp_int_digit)(a[11]);
#endif r[12] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[11]); t = (sp_int_digit)(a[10]);
r[11] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[10]); t = (sp_int_digit)(a[9]);
r[10] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[9]); t = (sp_int_digit)(a[8]);
r[9] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[8]); t = (sp_int_digit)(a[7]);
r[8] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[7]); t = (sp_int_digit)(a[6]);
r[7] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[6]); t = (sp_int_digit)(a[5]);
r[6] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[5]); t = (sp_int_digit)(a[4]);
r[5] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[4]); t = (sp_int_digit)(a[3]);
r[4] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[3]); t = (sp_int_digit)(a[2]);
r[3] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[2]); t = (sp_int_digit)(a[1]);
r[2] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
s = (sp_int_digit)(a[1]); t = (sp_int_digit)(a[0]);
r[1] = ((s << n) | (t >> (58U - n))) & 0x3ffffffffffffffUL;
#endif /* WOLFSSL_SP_SMALL */
r[0] = (a[0] << n) & 0x3ffffffffffffffL;
} }
/* Divide d in a and put remainder into r (m*d + r = a) /* Divide d in a and put remainder into r (m*d + r = a)
* m is not calculated as it is not needed at this time. * m is not calculated as it is not needed at this time.
* *
* Large number of bits in last word. * Simplified based on top word of divisor being (1 << 58) - 1
* *
* a Number to be divided. * a Number to be divided.
* d Number to divide with. * d Number to divide with.
@@ -40806,60 +40714,49 @@ static int sp_521_div_9(const sp_digit* a, const sp_digit* d,
const sp_digit* m, sp_digit* r) const sp_digit* m, sp_digit* r)
{ {
int i; int i;
sp_digit dv;
sp_digit r1; sp_digit r1;
sp_digit mask;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit* t1 = NULL; sp_digit* t1 = NULL;
#else #else
sp_digit t1[3 * 9 + 1]; sp_digit t1[4 * 9 + 3];
#endif #endif
sp_digit* t2 = NULL; sp_digit* t2 = NULL;
sp_digit* sd = NULL;
int err = MP_OKAY; int err = MP_OKAY;
(void)m; (void)m;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 9 + 1), NULL, t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * (4 * 9 + 3), NULL,
DYNAMIC_TYPE_TMP_BUFFER); DYNAMIC_TYPE_TMP_BUFFER);
if (t1 == NULL) if (t1 == NULL)
err = MEMORY_E; err = MEMORY_E;
#endif #endif
(void)m;
if (err == MP_OKAY) { if (err == MP_OKAY) {
t2 = t1 + 2 * 9; t2 = t1 + 18 + 1;
sd = t2 + 9 + 1;
dv = d[8]; sp_521_mul_d_9(sd, d, (sp_digit)1 << 1);
XMEMCPY(t1, a, sizeof(*t1) * 2U * 9U); sp_521_lshift_18(t1, a, 1);
t1[9 + 9] += t1[9 + 9 - 1] >> 58;
t1[9 + 9 - 1] &= 0x3ffffffffffffffL;
for (i=8; i>=0; i--) { for (i=8; i>=0; i--) {
t1[9 + i] += t1[9 + i - 1] >> 58; r1 = t1[9 + i];
t1[9 + i - 1] &= 0x3ffffffffffffffL; sp_521_mul_d_9(t2, sd, r1);
r1 = sp_521_div_word_9(t1[9 + i], t1[9 + i - 1], dv);
sp_521_mul_d_9(t2, d, r1);
(void)sp_521_sub_9(&t1[i], &t1[i], t2); (void)sp_521_sub_9(&t1[i], &t1[i], t2);
sp_521_norm_9(&t1[i]);
t1[9 + i] -= t2[9]; t1[9 + i] -= t2[9];
t1[9 + i] += t1[9 + i - 1] >> 58; sp_521_norm_9(&t1[i + 1]);
t1[9 + i - 1] &= 0x3ffffffffffffffL;
r1 = sp_521_div_word_9(-t1[9 + i], -t1[9 + i - 1], dv);
r1++;
sp_521_mul_d_9(t2, d, r1);
(void)sp_521_add_9(&t1[i], &t1[i], t2);
t1[9 + i] += t1[9 + i - 1] >> 58;
t1[9 + i - 1] &= 0x3ffffffffffffffL;
}
t1[9 - 1] += t1[9 - 2] >> 58;
t1[9 - 2] &= 0x3ffffffffffffffL;
r1 = t1[9 - 1] / dv;
sp_521_mul_d_9(t2, d, r1); mask = ~((t1[9 + i] - 1) >> 63);
(void)sp_521_sub_9(t1, t1, t2); sp_521_cond_sub_9(t1 + i, t1 + i, sd, mask);
XMEMCPY(r, t1, sizeof(*r) * 18U); sp_521_norm_9(&t1[i + 1]);
for (i=0; i<8; i++) {
r[i+1] += r[i] >> 58;
r[i] &= 0x3ffffffffffffffL;
} }
sp_521_cond_add_9(r, r, d, r[8] >> 63); sp_521_norm_9(t1);
sp_521_rshift_9(r, t1, 1);
} }
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
@@ -43261,7 +43158,7 @@ static WC_INLINE sp_digit sp_1024_div_word_18(sp_digit d1, sp_digit d0,
r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); r = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
t1 -= dv & (0 - r); t1 -= dv & (0 - r);
for (i = 55; i >= 1; i--) { for (i = 55; i >= 1; i--) {
t1 += t1 + ((sp_uint64)t0 >> 56); t1 += t1 + (((sp_uint64)t0 >> 56) & 1);
t0 <<= 1; t0 <<= 1;
t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63); t2 = (sp_digit)(((sp_uint64)(dv - t1)) >> 63);
r += r + t2; r += r + t2;
@@ -43275,11 +43172,6 @@ static WC_INLINE sp_digit sp_1024_div_word_18(sp_digit d1, sp_digit d0,
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114); r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div);
r += (sp_digit)(m >> 57);
m = d - ((sp_int128)r * div);
r += (m >> 114) - (sp_digit)(d >> 114);
m = d - ((sp_int128)r * div); m = d - ((sp_int128)r * div);
sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1; sign = (sp_digit)(0 - ((sp_uint64)m >> 63)) * 2 + 1;
m *= sign; m *= sign;

View File

@@ -12527,7 +12527,7 @@ SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
u += sp_4096_add_128(r + 64, r + 64, z1); u += sp_4096_add_128(r + 64, r + 64, z1);
XMEMSET(a1 + 1, 0, sizeof(sp_digit) * (64 - 1)); XMEMSET(a1 + 1, 0, sizeof(sp_digit) * (64 - 1));
a1[0] = u; a1[0] = u;
(void)sp_4096_add_64(r + 192, r + 192, a1); (void)sp_2048_add_64(r + 192, r + 192, a1);
} }
/* Square a and put result in r. (r = a * a) /* Square a and put result in r. (r = a * a)