forked from wolfSSL/wolfssl
SP ASM: fixes for Wycheproof tests
ARM64 ASM: Fix P256 Montogomery Reduce. Fix div to handle large dividend word.
This commit is contained in:
@@ -4253,9 +4253,13 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[31];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
|
||||
r1 = sp_2048_cmp_32(&t1[32], d) >= 0;
|
||||
sp_2048_cond_sub_32(&t1[32], &t1[32], d, (sp_digit)0 - r1);
|
||||
for (i = 31; i >= 0; i--) {
|
||||
sp_digit hi = t1[32 + i] - (t1[32 + i] == div);
|
||||
sp_digit mask = 0 - (t1[32 + i] == div);
|
||||
sp_digit hi = t1[32 + i] + mask;
|
||||
r1 = div_2048_word_32(hi, t1[32 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_2048_mul_d_32(t2, d, r1);
|
||||
t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
|
||||
@@ -5786,6 +5790,13 @@ static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, s
|
||||
|
||||
div = d[63];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
|
||||
for (i = 63; i > 0; i--) {
|
||||
if (t1[i + 64] != d[i])
|
||||
break;
|
||||
}
|
||||
if (t1[i + 64] >= d[i]) {
|
||||
sp_2048_sub_in_place_64(&t1[64], d);
|
||||
}
|
||||
for (i = 63; i >= 0; i--) {
|
||||
if (t1[64 + i] == div) {
|
||||
r1 = SP_DIGIT_MAX;
|
||||
@@ -6637,9 +6648,13 @@ static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[63];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
|
||||
r1 = sp_2048_cmp_64(&t1[64], d) >= 0;
|
||||
sp_2048_cond_sub_64(&t1[64], &t1[64], d, (sp_digit)0 - r1);
|
||||
for (i = 63; i >= 0; i--) {
|
||||
sp_digit hi = t1[64 + i] - (t1[64 + i] == div);
|
||||
sp_digit mask = 0 - (t1[64 + i] == div);
|
||||
sp_digit hi = t1[64 + i] + mask;
|
||||
r1 = div_2048_word_64(hi, t1[64 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_2048_mul_d_64(t2, d, r1);
|
||||
t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
|
||||
@@ -14312,9 +14327,13 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[47];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
|
||||
r1 = sp_3072_cmp_48(&t1[48], d) >= 0;
|
||||
sp_3072_cond_sub_48(&t1[48], &t1[48], d, (sp_digit)0 - r1);
|
||||
for (i = 47; i >= 0; i--) {
|
||||
sp_digit hi = t1[48 + i] - (t1[48 + i] == div);
|
||||
sp_digit mask = 0 - (t1[48 + i] == div);
|
||||
sp_digit hi = t1[48 + i] + mask;
|
||||
r1 = div_3072_word_48(hi, t1[48 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_3072_mul_d_48(t2, d, r1);
|
||||
t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
|
||||
@@ -16301,6 +16320,13 @@ static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, s
|
||||
|
||||
div = d[95];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
|
||||
for (i = 95; i > 0; i--) {
|
||||
if (t1[i + 96] != d[i])
|
||||
break;
|
||||
}
|
||||
if (t1[i + 96] >= d[i]) {
|
||||
sp_3072_sub_in_place_96(&t1[96], d);
|
||||
}
|
||||
for (i = 95; i >= 0; i--) {
|
||||
if (t1[96 + i] == div) {
|
||||
r1 = SP_DIGIT_MAX;
|
||||
@@ -17504,9 +17530,13 @@ static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[95];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
|
||||
r1 = sp_3072_cmp_96(&t1[96], d) >= 0;
|
||||
sp_3072_cond_sub_96(&t1[96], &t1[96], d, (sp_digit)0 - r1);
|
||||
for (i = 95; i >= 0; i--) {
|
||||
sp_digit hi = t1[96 + i] - (t1[96 + i] == div);
|
||||
sp_digit mask = 0 - (t1[96 + i] == div);
|
||||
sp_digit hi = t1[96 + i] + mask;
|
||||
r1 = div_3072_word_96(hi, t1[96 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_3072_mul_d_96(t2, d, r1);
|
||||
t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
|
||||
@@ -23356,6 +23386,13 @@ static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d,
|
||||
|
||||
div = d[127];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
|
||||
for (i = 127; i > 0; i--) {
|
||||
if (t1[i + 128] != d[i])
|
||||
break;
|
||||
}
|
||||
if (t1[i + 128] >= d[i]) {
|
||||
sp_4096_sub_in_place_128(&t1[128], d);
|
||||
}
|
||||
for (i = 127; i >= 0; i--) {
|
||||
if (t1[128 + i] == div) {
|
||||
r1 = SP_DIGIT_MAX;
|
||||
@@ -24911,9 +24948,13 @@ static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_di
|
||||
|
||||
div = d[127];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
|
||||
r1 = sp_4096_cmp_128(&t1[128], d) >= 0;
|
||||
sp_4096_cond_sub_128(&t1[128], &t1[128], d, (sp_digit)0 - r1);
|
||||
for (i = 127; i >= 0; i--) {
|
||||
sp_digit hi = t1[128 + i] - (t1[128 + i] == div);
|
||||
sp_digit mask = 0 - (t1[128 + i] == div);
|
||||
sp_digit hi = t1[128 + i] + mask;
|
||||
r1 = div_4096_word_128(hi, t1[128 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_4096_mul_d_128(t2, d, r1);
|
||||
t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
|
||||
@@ -34575,9 +34616,13 @@ static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit
|
||||
|
||||
div = d[7];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 8);
|
||||
r1 = sp_256_cmp_8(&t1[8], d) >= 0;
|
||||
sp_256_cond_sub_8(&t1[8], &t1[8], d, (sp_digit)0 - r1);
|
||||
for (i = 7; i >= 0; i--) {
|
||||
sp_digit hi = t1[8 + i] - (t1[8 + i] == div);
|
||||
sp_digit mask = 0 - (t1[8 + i] == div);
|
||||
sp_digit hi = t1[8 + i] + mask;
|
||||
r1 = div_256_word_8(hi, t1[8 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_256_mul_d_8(t2, d, r1);
|
||||
t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2);
|
||||
@@ -43678,9 +43723,13 @@ static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digi
|
||||
|
||||
div = d[11];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 12);
|
||||
r1 = sp_384_cmp_12(&t1[12], d) >= 0;
|
||||
sp_384_cond_sub_12(&t1[12], &t1[12], d, (sp_digit)0 - r1);
|
||||
for (i = 11; i >= 0; i--) {
|
||||
sp_digit hi = t1[12 + i] - (t1[12 + i] == div);
|
||||
sp_digit mask = 0 - (t1[12 + i] == div);
|
||||
sp_digit hi = t1[12 + i] + mask;
|
||||
r1 = div_384_word_12(hi, t1[12 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_384_mul_d_12(t2, d, r1);
|
||||
t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2);
|
||||
@@ -62498,9 +62547,13 @@ static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[31];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
|
||||
r1 = sp_1024_cmp_32(&t1[32], d) >= 0;
|
||||
sp_1024_cond_sub_32(&t1[32], &t1[32], d, (sp_digit)0 - r1);
|
||||
for (i = 31; i >= 0; i--) {
|
||||
sp_digit hi = t1[32 + i] - (t1[32 + i] == div);
|
||||
sp_digit mask = 0 - (t1[32 + i] == div);
|
||||
sp_digit hi = t1[32 + i] + mask;
|
||||
r1 = div_1024_word_32(hi, t1[32 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_1024_mul_d_32(t2, d, r1);
|
||||
t1[32 + i] += sp_1024_sub_in_place_32(&t1[i], t2);
|
||||
|
@@ -3934,9 +3934,13 @@ static WC_INLINE int sp_2048_div_16(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[15];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 16);
|
||||
r1 = sp_2048_cmp_16(&t1[16], d) >= 0;
|
||||
sp_2048_cond_sub_16(&t1[16], &t1[16], d, (sp_digit)0 - r1);
|
||||
for (i = 15; i >= 0; i--) {
|
||||
sp_digit hi = t1[16 + i] - (t1[16 + i] == div);
|
||||
sp_digit mask = 0 - (t1[16 + i] == div);
|
||||
sp_digit hi = t1[16 + i] + mask;
|
||||
r1 = div_2048_word_16(hi, t1[16 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_2048_mul_d_16(t2, d, r1);
|
||||
t1[16 + i] += sp_2048_sub_in_place_16(&t1[i], t2);
|
||||
@@ -4970,6 +4974,13 @@ static WC_INLINE int sp_2048_div_32_cond(const sp_digit* a, const sp_digit* d, s
|
||||
|
||||
div = d[31];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
|
||||
for (i = 31; i > 0; i--) {
|
||||
if (t1[i + 32] != d[i])
|
||||
break;
|
||||
}
|
||||
if (t1[i + 32] >= d[i]) {
|
||||
sp_2048_sub_in_place_32(&t1[32], d);
|
||||
}
|
||||
for (i = 31; i >= 0; i--) {
|
||||
if (t1[32 + i] == div) {
|
||||
r1 = SP_DIGIT_MAX;
|
||||
@@ -5530,9 +5541,13 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[31];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
|
||||
r1 = sp_2048_cmp_32(&t1[32], d) >= 0;
|
||||
sp_2048_cond_sub_32(&t1[32], &t1[32], d, (sp_digit)0 - r1);
|
||||
for (i = 31; i >= 0; i--) {
|
||||
sp_digit hi = t1[32 + i] - (t1[32 + i] == div);
|
||||
sp_digit mask = 0 - (t1[32 + i] == div);
|
||||
sp_digit hi = t1[32 + i] + mask;
|
||||
r1 = div_2048_word_32(hi, t1[32 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_2048_mul_d_32(t2, d, r1);
|
||||
t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
|
||||
@@ -13071,9 +13086,13 @@ static WC_INLINE int sp_3072_div_24(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[23];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 24);
|
||||
r1 = sp_3072_cmp_24(&t1[24], d) >= 0;
|
||||
sp_3072_cond_sub_24(&t1[24], &t1[24], d, (sp_digit)0 - r1);
|
||||
for (i = 23; i >= 0; i--) {
|
||||
sp_digit hi = t1[24 + i] - (t1[24 + i] == div);
|
||||
sp_digit mask = 0 - (t1[24 + i] == div);
|
||||
sp_digit hi = t1[24 + i] + mask;
|
||||
r1 = div_3072_word_24(hi, t1[24 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_3072_mul_d_24(t2, d, r1);
|
||||
t1[24 + i] += sp_3072_sub_in_place_24(&t1[i], t2);
|
||||
@@ -14347,6 +14366,13 @@ static WC_INLINE int sp_3072_div_48_cond(const sp_digit* a, const sp_digit* d, s
|
||||
|
||||
div = d[47];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
|
||||
for (i = 47; i > 0; i--) {
|
||||
if (t1[i + 48] != d[i])
|
||||
break;
|
||||
}
|
||||
if (t1[i + 48] >= d[i]) {
|
||||
sp_3072_sub_in_place_48(&t1[48], d);
|
||||
}
|
||||
for (i = 47; i >= 0; i--) {
|
||||
if (t1[48 + i] == div) {
|
||||
r1 = SP_DIGIT_MAX;
|
||||
@@ -15059,9 +15085,13 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[47];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
|
||||
r1 = sp_3072_cmp_48(&t1[48], d) >= 0;
|
||||
sp_3072_cond_sub_48(&t1[48], &t1[48], d, (sp_digit)0 - r1);
|
||||
for (i = 47; i >= 0; i--) {
|
||||
sp_digit hi = t1[48 + i] - (t1[48 + i] == div);
|
||||
sp_digit mask = 0 - (t1[48 + i] == div);
|
||||
sp_digit hi = t1[48 + i] + mask;
|
||||
r1 = div_3072_word_48(hi, t1[48 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_3072_mul_d_48(t2, d, r1);
|
||||
t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
|
||||
@@ -19256,6 +19286,13 @@ static WC_INLINE int sp_4096_div_64_cond(const sp_digit* a, const sp_digit* d, s
|
||||
|
||||
div = d[63];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
|
||||
for (i = 63; i > 0; i--) {
|
||||
if (t1[i + 64] != d[i])
|
||||
break;
|
||||
}
|
||||
if (t1[i + 64] >= d[i]) {
|
||||
sp_4096_sub_in_place_64(&t1[64], d);
|
||||
}
|
||||
for (i = 63; i >= 0; i--) {
|
||||
if (t1[64 + i] == div) {
|
||||
r1 = SP_DIGIT_MAX;
|
||||
@@ -20120,9 +20157,13 @@ static WC_INLINE int sp_4096_div_64(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[63];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
|
||||
r1 = sp_4096_cmp_64(&t1[64], d) >= 0;
|
||||
sp_4096_cond_sub_64(&t1[64], &t1[64], d, (sp_digit)0 - r1);
|
||||
for (i = 63; i >= 0; i--) {
|
||||
sp_digit hi = t1[64 + i] - (t1[64 + i] == div);
|
||||
sp_digit mask = 0 - (t1[64 + i] == div);
|
||||
sp_digit hi = t1[64 + i] + mask;
|
||||
r1 = div_4096_word_64(hi, t1[64 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_4096_mul_d_64(t2, d, r1);
|
||||
t1[64 + i] += sp_4096_sub_in_place_64(&t1[i], t2);
|
||||
@@ -22388,11 +22429,13 @@ SP_NOINLINE static void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, const
|
||||
"adcs x15, x15, xzr\n\t"
|
||||
"extr x6, x6, x5, 32\n\t"
|
||||
"adc x8, x8, xzr\n\t"
|
||||
"adds x11, x11, x6\n\t"
|
||||
"extr x5, x5, x4, 32\n\t"
|
||||
"lsl x4, x4, 32\n\t"
|
||||
"adds x9, x9, x4\n\t"
|
||||
"adcs x10, x10, x5\n\t"
|
||||
"adcs x11, x11, x6\n\t"
|
||||
"adcs x12, x12, x7\n\t"
|
||||
"adcs x13, x13, x16\n\t"
|
||||
"lsl x4, x4, 32\n\t"
|
||||
"adcs x14, x14, xzr\n\t"
|
||||
"adcs x15, x15, xzr\n\t"
|
||||
"adc x8, x8, xzr\n\t"
|
||||
@@ -22400,12 +22443,11 @@ SP_NOINLINE static void sp_256_mont_mul_4(sp_digit* r, const sp_digit* a, const
|
||||
"subs x11, x11, x4\n\t"
|
||||
"sbcs x12, x12, x5\n\t"
|
||||
"sbcs x13, x13, x6\n\t"
|
||||
"sub x8, xzr, x8\n\t"
|
||||
"sbcs x14, x14, x7\n\t"
|
||||
"sub x8, x8, #1\n\t"
|
||||
"sbcs x15, x15, x16\n\t"
|
||||
"mov x19, 0xffffffff00000001\n\t"
|
||||
"adc x8, x8, xzr\n\t"
|
||||
"sbc x8, x8, xzr\n\t"
|
||||
"neg x8, x8\n\t"
|
||||
"# mask m and sub from result if overflow\n\t"
|
||||
"# m[0] = -1 & mask = mask\n\t"
|
||||
"subs x12, x12, x8\n\t"
|
||||
@@ -22535,11 +22577,13 @@ SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const
|
||||
"adcs x15, x15, xzr\n\t"
|
||||
"extr x5, x5, x4, 32\n\t"
|
||||
"adc x8, x8, xzr\n\t"
|
||||
"adds x11, x11, x5\n\t"
|
||||
"extr x4, x4, x3, 32\n\t"
|
||||
"lsl x3, x3, 32\n\t"
|
||||
"adds x9, x9, x3\n\t"
|
||||
"adcs x10, x10, x4\n\t"
|
||||
"adcs x11, x11, x5\n\t"
|
||||
"adcs x12, x12, x6\n\t"
|
||||
"adcs x13, x13, x7\n\t"
|
||||
"lsl x3, x3, 32\n\t"
|
||||
"adcs x14, x14, xzr\n\t"
|
||||
"adcs x15, x15, xzr\n\t"
|
||||
"adc x8, x8, xzr\n\t"
|
||||
@@ -22547,12 +22591,11 @@ SP_NOINLINE static void sp_256_mont_sqr_4(sp_digit* r, const sp_digit* a, const
|
||||
"subs x11, x11, x3\n\t"
|
||||
"sbcs x12, x12, x4\n\t"
|
||||
"sbcs x13, x13, x5\n\t"
|
||||
"sub x8, xzr, x8\n\t"
|
||||
"sbcs x14, x14, x6\n\t"
|
||||
"sub x8, x8, #1\n\t"
|
||||
"sbcs x15, x15, x7\n\t"
|
||||
"mov x17, 0xffffffff00000001\n\t"
|
||||
"adc x8, x8, xzr\n\t"
|
||||
"sbc x8, x8, xzr\n\t"
|
||||
"neg x8, x8\n\t"
|
||||
"# mask m and sub from result if overflow\n\t"
|
||||
"# m[0] = -1 & mask = mask\n\t"
|
||||
"subs x12, x12, x8\n\t"
|
||||
@@ -22839,11 +22882,13 @@ SP_NOINLINE static void sp_256_mont_reduce_4(sp_digit* a, const sp_digit* m,
|
||||
"adcs x17, x17, xzr\n\t"
|
||||
"extr x5, x5, x4, 32\n\t"
|
||||
"adc x10, x10, xzr\n\t"
|
||||
"adds x13, x13, x5\n\t"
|
||||
"extr x4, x4, x3, 32\n\t"
|
||||
"lsl x3, x3, 32\n\t"
|
||||
"adds x11, x11, x3\n\t"
|
||||
"adcs x12, x12, x4\n\t"
|
||||
"adcs x13, x13, x5\n\t"
|
||||
"adcs x14, x14, x6\n\t"
|
||||
"adcs x15, x15, x7\n\t"
|
||||
"lsl x3, x3, 32\n\t"
|
||||
"adcs x16, x16, xzr\n\t"
|
||||
"adcs x17, x17, xzr\n\t"
|
||||
"adc x10, x10, xzr\n\t"
|
||||
@@ -22851,12 +22896,11 @@ SP_NOINLINE static void sp_256_mont_reduce_4(sp_digit* a, const sp_digit* m,
|
||||
"subs x13, x13, x3\n\t"
|
||||
"sbcs x14, x14, x4\n\t"
|
||||
"sbcs x15, x15, x5\n\t"
|
||||
"sub x10, xzr, x10\n\t"
|
||||
"sbcs x16, x16, x6\n\t"
|
||||
"sub x10, x10, #1\n\t"
|
||||
"sbcs x17, x17, x7\n\t"
|
||||
"mov x9, 0xffffffff00000001\n\t"
|
||||
"adc x10, x10, xzr\n\t"
|
||||
"sbc x10, x10, xzr\n\t"
|
||||
"neg x10, x10\n\t"
|
||||
"# mask m and sub from result if overflow\n\t"
|
||||
"# m[0] = -1 & mask = mask\n\t"
|
||||
"subs x14, x14, x10\n\t"
|
||||
@@ -39854,9 +39898,13 @@ static WC_INLINE int sp_256_div_4(const sp_digit* a, const sp_digit* d, sp_digit
|
||||
|
||||
div = d[3];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 4);
|
||||
r1 = sp_256_cmp_4(&t1[4], d) >= 0;
|
||||
sp_256_cond_sub_4(&t1[4], &t1[4], d, (sp_digit)0 - r1);
|
||||
for (i = 3; i >= 0; i--) {
|
||||
sp_digit hi = t1[4 + i] - (t1[4 + i] == div);
|
||||
sp_digit mask = 0 - (t1[4 + i] == div);
|
||||
sp_digit hi = t1[4 + i] + mask;
|
||||
r1 = div_256_word_4(hi, t1[4 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_256_mul_d_4(t2, d, r1);
|
||||
t1[4 + i] += sp_256_sub_in_place_4(&t1[i], t2);
|
||||
@@ -65802,9 +65850,13 @@ static WC_INLINE int sp_384_div_6(const sp_digit* a, const sp_digit* d, sp_digit
|
||||
|
||||
div = d[5];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 6);
|
||||
r1 = sp_384_cmp_6(&t1[6], d) >= 0;
|
||||
sp_384_cond_sub_6(&t1[6], &t1[6], d, (sp_digit)0 - r1);
|
||||
for (i = 5; i >= 0; i--) {
|
||||
sp_digit hi = t1[6 + i] - (t1[6 + i] == div);
|
||||
sp_digit mask = 0 - (t1[6 + i] == div);
|
||||
sp_digit hi = t1[6 + i] + mask;
|
||||
r1 = div_384_word_6(hi, t1[6 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_384_mul_d_6(t2, d, r1);
|
||||
t1[6 + i] += sp_384_sub_in_place_6(&t1[i], t2);
|
||||
@@ -113907,9 +113959,13 @@ static WC_INLINE int sp_1024_div_16(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[15];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 16);
|
||||
r1 = sp_1024_cmp_16(&t1[16], d) >= 0;
|
||||
sp_1024_cond_sub_16(&t1[16], &t1[16], d, (sp_digit)0 - r1);
|
||||
for (i = 15; i >= 0; i--) {
|
||||
sp_digit hi = t1[16 + i] - (t1[16 + i] == div);
|
||||
sp_digit mask = 0 - (t1[16 + i] == div);
|
||||
sp_digit hi = t1[16 + i] + mask;
|
||||
r1 = div_1024_word_16(hi, t1[16 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_1024_mul_d_16(t2, d, r1);
|
||||
t1[16 + i] += sp_1024_sub_in_place_16(&t1[i], t2);
|
||||
|
@@ -23937,9 +23937,13 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[31];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
|
||||
r1 = sp_2048_cmp_32(&t1[32], d) >= 0;
|
||||
sp_2048_cond_sub_32(&t1[32], &t1[32], d, (sp_digit)0 - r1);
|
||||
for (i = 31; i >= 0; i--) {
|
||||
sp_digit hi = t1[32 + i] - (t1[32 + i] == div);
|
||||
sp_digit mask = 0 - (t1[32 + i] == div);
|
||||
sp_digit hi = t1[32 + i] + mask;
|
||||
r1 = div_2048_word_32(hi, t1[32 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_2048_mul_d_32(t2, d, r1);
|
||||
t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
|
||||
@@ -27307,6 +27311,13 @@ static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, s
|
||||
|
||||
div = d[63];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
|
||||
for (i = 63; i > 0; i--) {
|
||||
if (t1[i + 64] != d[i])
|
||||
break;
|
||||
}
|
||||
if (t1[i + 64] >= d[i]) {
|
||||
sp_2048_sub_in_place_64(&t1[64], d);
|
||||
}
|
||||
for (i = 63; i >= 0; i--) {
|
||||
if (t1[64 + i] == div) {
|
||||
r1 = SP_DIGIT_MAX;
|
||||
@@ -27513,9 +27524,13 @@ static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[63];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
|
||||
r1 = sp_2048_cmp_64(&t1[64], d) >= 0;
|
||||
sp_2048_cond_sub_64(&t1[64], &t1[64], d, (sp_digit)0 - r1);
|
||||
for (i = 63; i >= 0; i--) {
|
||||
sp_digit hi = t1[64 + i] - (t1[64 + i] == div);
|
||||
sp_digit mask = 0 - (t1[64 + i] == div);
|
||||
sp_digit hi = t1[64 + i] + mask;
|
||||
r1 = div_2048_word_64(hi, t1[64 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_2048_mul_d_64(t2, d, r1);
|
||||
t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
|
||||
@@ -75379,9 +75394,13 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[47];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
|
||||
r1 = sp_3072_cmp_48(&t1[48], d) >= 0;
|
||||
sp_3072_cond_sub_48(&t1[48], &t1[48], d, (sp_digit)0 - r1);
|
||||
for (i = 47; i >= 0; i--) {
|
||||
sp_digit hi = t1[48 + i] - (t1[48 + i] == div);
|
||||
sp_digit mask = 0 - (t1[48 + i] == div);
|
||||
sp_digit hi = t1[48 + i] + mask;
|
||||
r1 = div_3072_word_48(hi, t1[48 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_3072_mul_d_48(t2, d, r1);
|
||||
t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
|
||||
@@ -79575,6 +79594,13 @@ static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, s
|
||||
|
||||
div = d[95];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
|
||||
for (i = 95; i > 0; i--) {
|
||||
if (t1[i + 96] != d[i])
|
||||
break;
|
||||
}
|
||||
if (t1[i + 96] >= d[i]) {
|
||||
sp_3072_sub_in_place_96(&t1[96], d);
|
||||
}
|
||||
for (i = 95; i >= 0; i--) {
|
||||
if (t1[96 + i] == div) {
|
||||
r1 = SP_DIGIT_MAX;
|
||||
@@ -79786,9 +79812,13 @@ static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[95];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
|
||||
r1 = sp_3072_cmp_96(&t1[96], d) >= 0;
|
||||
sp_3072_cond_sub_96(&t1[96], &t1[96], d, (sp_digit)0 - r1);
|
||||
for (i = 95; i >= 0; i--) {
|
||||
sp_digit hi = t1[96 + i] - (t1[96 + i] == div);
|
||||
sp_digit mask = 0 - (t1[96 + i] == div);
|
||||
sp_digit hi = t1[96 + i] + mask;
|
||||
r1 = div_3072_word_96(hi, t1[96 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_3072_mul_d_96(t2, d, r1);
|
||||
t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
|
||||
@@ -92108,6 +92138,13 @@ static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d,
|
||||
|
||||
div = d[127];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
|
||||
for (i = 127; i > 0; i--) {
|
||||
if (t1[i + 128] != d[i])
|
||||
break;
|
||||
}
|
||||
if (t1[i + 128] >= d[i]) {
|
||||
sp_4096_sub_in_place_128(&t1[128], d);
|
||||
}
|
||||
for (i = 127; i >= 0; i--) {
|
||||
if (t1[128 + i] == div) {
|
||||
r1 = SP_DIGIT_MAX;
|
||||
@@ -92320,9 +92357,13 @@ static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_di
|
||||
|
||||
div = d[127];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
|
||||
r1 = sp_4096_cmp_128(&t1[128], d) >= 0;
|
||||
sp_4096_cond_sub_128(&t1[128], &t1[128], d, (sp_digit)0 - r1);
|
||||
for (i = 127; i >= 0; i--) {
|
||||
sp_digit hi = t1[128 + i] - (t1[128 + i] == div);
|
||||
sp_digit mask = 0 - (t1[128 + i] == div);
|
||||
sp_digit hi = t1[128 + i] + mask;
|
||||
r1 = div_4096_word_128(hi, t1[128 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_4096_mul_d_128(t2, d, r1);
|
||||
t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
|
||||
@@ -105211,9 +105252,13 @@ static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit
|
||||
|
||||
div = d[7];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 8);
|
||||
r1 = sp_256_cmp_8(&t1[8], d) >= 0;
|
||||
sp_256_cond_sub_8(&t1[8], &t1[8], d, (sp_digit)0 - r1);
|
||||
for (i = 7; i >= 0; i--) {
|
||||
sp_digit hi = t1[8 + i] - (t1[8 + i] == div);
|
||||
sp_digit mask = 0 - (t1[8 + i] == div);
|
||||
sp_digit hi = t1[8 + i] + mask;
|
||||
r1 = div_256_word_8(hi, t1[8 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_256_mul_d_8(t2, d, r1);
|
||||
t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2);
|
||||
@@ -115844,9 +115889,13 @@ static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digi
|
||||
|
||||
div = d[11];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 12);
|
||||
r1 = sp_384_cmp_12(&t1[12], d) >= 0;
|
||||
sp_384_cond_sub_12(&t1[12], &t1[12], d, (sp_digit)0 - r1);
|
||||
for (i = 11; i >= 0; i--) {
|
||||
sp_digit hi = t1[12 + i] - (t1[12 + i] == div);
|
||||
sp_digit mask = 0 - (t1[12 + i] == div);
|
||||
sp_digit hi = t1[12 + i] + mask;
|
||||
r1 = div_384_word_12(hi, t1[12 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_384_mul_d_12(t2, d, r1);
|
||||
t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2);
|
||||
@@ -201817,9 +201866,13 @@ static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[31];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
|
||||
r1 = sp_1024_cmp_32(&t1[32], d) >= 0;
|
||||
sp_1024_cond_sub_32(&t1[32], &t1[32], d, (sp_digit)0 - r1);
|
||||
for (i = 31; i >= 0; i--) {
|
||||
sp_digit hi = t1[32 + i] - (t1[32 + i] == div);
|
||||
sp_digit mask = 0 - (t1[32 + i] == div);
|
||||
sp_digit hi = t1[32 + i] + mask;
|
||||
r1 = div_1024_word_32(hi, t1[32 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_1024_mul_d_32(t2, d, r1);
|
||||
t1[32 + i] += sp_1024_sub_in_place_32(&t1[i], t2);
|
||||
|
@@ -3372,9 +3372,13 @@ static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[31];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
|
||||
r1 = sp_2048_cmp_32(&t1[32], d) >= 0;
|
||||
sp_2048_cond_sub_32(&t1[32], &t1[32], d, (sp_digit)0 - r1);
|
||||
for (i = 31; i >= 0; i--) {
|
||||
sp_digit hi = t1[32 + i] - (t1[32 + i] == div);
|
||||
sp_digit mask = 0 - (t1[32 + i] == div);
|
||||
sp_digit hi = t1[32 + i] + mask;
|
||||
r1 = div_2048_word_32(hi, t1[32 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_2048_mul_d_32(t2, d, r1);
|
||||
t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
|
||||
@@ -4215,6 +4219,13 @@ static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, s
|
||||
|
||||
div = d[63];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
|
||||
for (i = 63; i > 0; i--) {
|
||||
if (t1[i + 64] != d[i])
|
||||
break;
|
||||
}
|
||||
if (t1[i + 64] >= d[i]) {
|
||||
sp_2048_sub_in_place_64(&t1[64], d);
|
||||
}
|
||||
for (i = 63; i >= 0; i--) {
|
||||
if (t1[64 + i] == div) {
|
||||
r1 = SP_DIGIT_MAX;
|
||||
@@ -4357,9 +4368,13 @@ static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[63];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
|
||||
r1 = sp_2048_cmp_64(&t1[64], d) >= 0;
|
||||
sp_2048_cond_sub_64(&t1[64], &t1[64], d, (sp_digit)0 - r1);
|
||||
for (i = 63; i >= 0; i--) {
|
||||
sp_digit hi = t1[64 + i] - (t1[64 + i] == div);
|
||||
sp_digit mask = 0 - (t1[64 + i] == div);
|
||||
sp_digit hi = t1[64 + i] + mask;
|
||||
r1 = div_2048_word_64(hi, t1[64 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_2048_mul_d_64(t2, d, r1);
|
||||
t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
|
||||
@@ -8884,9 +8899,13 @@ static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[47];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
|
||||
r1 = sp_3072_cmp_48(&t1[48], d) >= 0;
|
||||
sp_3072_cond_sub_48(&t1[48], &t1[48], d, (sp_digit)0 - r1);
|
||||
for (i = 47; i >= 0; i--) {
|
||||
sp_digit hi = t1[48 + i] - (t1[48 + i] == div);
|
||||
sp_digit mask = 0 - (t1[48 + i] == div);
|
||||
sp_digit hi = t1[48 + i] + mask;
|
||||
r1 = div_3072_word_48(hi, t1[48 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_3072_mul_d_48(t2, d, r1);
|
||||
t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
|
||||
@@ -9809,6 +9828,13 @@ static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, s
|
||||
|
||||
div = d[95];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
|
||||
for (i = 95; i > 0; i--) {
|
||||
if (t1[i + 96] != d[i])
|
||||
break;
|
||||
}
|
||||
if (t1[i + 96] >= d[i]) {
|
||||
sp_3072_sub_in_place_96(&t1[96], d);
|
||||
}
|
||||
for (i = 95; i >= 0; i--) {
|
||||
if (t1[96 + i] == div) {
|
||||
r1 = SP_DIGIT_MAX;
|
||||
@@ -9953,9 +9979,13 @@ static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[95];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
|
||||
r1 = sp_3072_cmp_96(&t1[96], d) >= 0;
|
||||
sp_3072_cond_sub_96(&t1[96], &t1[96], d, (sp_digit)0 - r1);
|
||||
for (i = 95; i >= 0; i--) {
|
||||
sp_digit hi = t1[96 + i] - (t1[96 + i] == div);
|
||||
sp_digit mask = 0 - (t1[96 + i] == div);
|
||||
sp_digit hi = t1[96 + i] + mask;
|
||||
r1 = div_3072_word_96(hi, t1[96 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_3072_mul_d_96(t2, d, r1);
|
||||
t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
|
||||
@@ -13586,6 +13616,13 @@ static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d,
|
||||
|
||||
div = d[127];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
|
||||
for (i = 127; i > 0; i--) {
|
||||
if (t1[i + 128] != d[i])
|
||||
break;
|
||||
}
|
||||
if (t1[i + 128] >= d[i]) {
|
||||
sp_4096_sub_in_place_128(&t1[128], d);
|
||||
}
|
||||
for (i = 127; i >= 0; i--) {
|
||||
if (t1[128 + i] == div) {
|
||||
r1 = SP_DIGIT_MAX;
|
||||
@@ -13730,9 +13767,13 @@ static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_di
|
||||
|
||||
div = d[127];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
|
||||
r1 = sp_4096_cmp_128(&t1[128], d) >= 0;
|
||||
sp_4096_cond_sub_128(&t1[128], &t1[128], d, (sp_digit)0 - r1);
|
||||
for (i = 127; i >= 0; i--) {
|
||||
sp_digit hi = t1[128 + i] - (t1[128 + i] == div);
|
||||
sp_digit mask = 0 - (t1[128 + i] == div);
|
||||
sp_digit hi = t1[128 + i] + mask;
|
||||
r1 = div_4096_word_128(hi, t1[128 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_4096_mul_d_128(t2, d, r1);
|
||||
t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
|
||||
@@ -23094,9 +23135,13 @@ static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit
|
||||
|
||||
div = d[7];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 8);
|
||||
r1 = sp_256_cmp_8(&t1[8], d) >= 0;
|
||||
sp_256_cond_sub_8(&t1[8], &t1[8], d, (sp_digit)0 - r1);
|
||||
for (i = 7; i >= 0; i--) {
|
||||
sp_digit hi = t1[8 + i] - (t1[8 + i] == div);
|
||||
sp_digit mask = 0 - (t1[8 + i] == div);
|
||||
sp_digit hi = t1[8 + i] + mask;
|
||||
r1 = div_256_word_8(hi, t1[8 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_256_mul_d_8(t2, d, r1);
|
||||
t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2);
|
||||
@@ -30283,9 +30328,13 @@ static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digi
|
||||
|
||||
div = d[11];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 12);
|
||||
r1 = sp_384_cmp_12(&t1[12], d) >= 0;
|
||||
sp_384_cond_sub_12(&t1[12], &t1[12], d, (sp_digit)0 - r1);
|
||||
for (i = 11; i >= 0; i--) {
|
||||
sp_digit hi = t1[12 + i] - (t1[12 + i] == div);
|
||||
sp_digit mask = 0 - (t1[12 + i] == div);
|
||||
sp_digit hi = t1[12 + i] + mask;
|
||||
r1 = div_384_word_12(hi, t1[12 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_384_mul_d_12(t2, d, r1);
|
||||
t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2);
|
||||
@@ -42243,9 +42292,13 @@ static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, sp_dig
|
||||
|
||||
div = d[31];
|
||||
XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
|
||||
r1 = sp_1024_cmp_32(&t1[32], d) >= 0;
|
||||
sp_1024_cond_sub_32(&t1[32], &t1[32], d, (sp_digit)0 - r1);
|
||||
for (i = 31; i >= 0; i--) {
|
||||
sp_digit hi = t1[32 + i] - (t1[32 + i] == div);
|
||||
sp_digit mask = 0 - (t1[32 + i] == div);
|
||||
sp_digit hi = t1[32 + i] + mask;
|
||||
r1 = div_1024_word_32(hi, t1[32 + i - 1], div);
|
||||
r1 |= mask;
|
||||
|
||||
sp_1024_mul_d_32(t2, d, r1);
|
||||
t1[32 + i] += sp_1024_sub_in_place_32(&t1[i], t2);
|
||||
|
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
|
||||
#ifdef WOLFSSL_USER_SETTINGS
|
||||
#include "wolfssl/wolfcrypt/settings.h"
|
||||
#include "wolfssl/wolfcrpyt/settings.h"
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_INTEL_AVX1
|
||||
|
Reference in New Issue
Block a user