From 902087df6f0596bdabd0e8000733503ea183175b Mon Sep 17 00:00:00 2001 From: JacobBarthelmeh Date: Thu, 18 Jul 2024 07:21:57 -0600 Subject: [PATCH 1/6] add w64wrapper support in poly1305 --- wolfcrypt/src/misc.c | 53 ++++++++++++++++++++++- wolfcrypt/src/poly1305.c | 84 ++++++++++++++++++++++++++++++++++++ wolfssl/wolfcrypt/misc.h | 2 + wolfssl/wolfcrypt/poly1305.h | 2 +- 4 files changed, 138 insertions(+), 3 deletions(-) diff --git a/wolfcrypt/src/misc.c b/wolfcrypt/src/misc.c index 10f733bd0..c08dd3057 100644 --- a/wolfcrypt/src/misc.c +++ b/wolfcrypt/src/misc.c @@ -796,6 +796,13 @@ WC_MISC_STATIC WC_INLINE w64wrapper w64ShiftLeft(w64wrapper a, int shift) return a; } +WC_MISC_STATIC WC_INLINE w64wrapper w64Mul(unsigned int a, unsigned int b) +{ + w64wrapper ret; + ret.n = (word64)a * (word64)b; + return ret; +} + #else WC_MISC_STATIC WC_INLINE void w64Increment(w64wrapper *n) @@ -841,6 +848,24 @@ WC_MISC_STATIC WC_INLINE w64wrapper w64Add32(w64wrapper a, word32 b, byte *wrap) return a; } +WC_MISC_STATIC WC_INLINE w64wrapper w64Add(w64wrapper a, w64wrapper b, + byte *wrap) +{ + a.n[1] = a.n[1] + b.n[1]; + if (a.n[1] < b.n[1]) { + a.n[0]++; + if (wrap != NULL && a.n[0] == 0) + *wrap = 1; + } + + a.n[0] = a.n[0] + b.n[0]; + if (a.n[0] < b.n[0]) { + *wrap = 1; + } + + return a; +} + WC_MISC_STATIC WC_INLINE w64wrapper w64Sub32(w64wrapper a, word32 b, byte *wrap) { byte _underflow = 0; @@ -939,7 +964,7 @@ WC_MISC_STATIC WC_INLINE byte w64LT(w64wrapper a, w64wrapper b) WC_MISC_STATIC WC_INLINE w64wrapper w64ShiftRight(w64wrapper a, int shift) { if (shift < 32) { - a.n[1] = (a.n[1] >> shift) || (a.n[0] << (32 - shift)); + a.n[1] = (a.n[1] >> shift) | (a.n[0] << (32 - shift)); a.n[0] >>= shift; } else { @@ -951,7 +976,7 @@ WC_MISC_STATIC WC_INLINE w64wrapper w64ShiftRight(w64wrapper a, int shift) WC_MISC_STATIC WC_INLINE w64wrapper w64ShiftLeft(w64wrapper a, int shift) { if (shift < 32) { - a.n[0] = (a.n[0] << shift) || (a.n[1] >> (32 - shift)); + a.n[0] = (a.n[0] << shift) | (a.n[1] >> (32 - shift)); a.n[1] <<= shift; } else { @@ -961,6 +986,30 @@ WC_MISC_STATIC WC_INLINE w64wrapper w64ShiftLeft(w64wrapper a, int shift) return a; } +WC_MISC_STATIC WC_INLINE w64wrapper w64Mul(word32 a, word32 b) +{ + w64wrapper ret; + word16 ltlA, ltlB, ltlC, ltlD; + word32 bigA, bigB, bigC, bigD; + + ltlA = a & 0xFFFF; + ltlB = (a >> 16) & 0xFFFF; + ltlC = b & 0xFFFF; + ltlD = (b >> 16) & 0xFFFF; + + bigA = ltlA * ltlC; + bigC = ltlB * ltlC; + bigD = ltlA * ltlD; + bigB = ltlB * ltlD; + + ret = w64From32(0, bigB); + ret = w64ShiftLeft(ret, 16); + ret = w64Add32(ret, bigD, NULL); + ret = w64Add32(ret, bigC, NULL); + ret = w64ShiftLeft(ret, 16); + return w64Add32(ret, bigA, NULL); +} + #endif /* WORD64_AVAILABLE && !WOLFSSL_W64_WRAPPER_TEST */ #endif /* WOLFSSL_W64_WRAPPER */ diff --git a/wolfcrypt/src/poly1305.c b/wolfcrypt/src/poly1305.c index cde754752..7fd57c42d 100644 --- a/wolfcrypt/src/poly1305.c +++ b/wolfcrypt/src/poly1305.c @@ -29,6 +29,13 @@ and Daniel J. Bernstein */ +/* + * WOLFSSL_W64_WRAPPER Uses wrappers around word64 types for a system that does + * not have word64 available. As expected it reduces + * performance. Benchmarks collected July 2024 show + * 303.004 MiB/s with and 1874.194 MiB/s without. + */ + #ifdef HAVE_CONFIG_H #include #endif @@ -332,7 +339,11 @@ static int poly1305_blocks(Poly1305* ctx, const unsigned char *m, word32 r0,r1,r2,r3,r4; word32 s1,s2,s3,s4; word32 h0,h1,h2,h3,h4; +#ifdef WOLFSSL_W64_WRAPPER + w64wrapper d0,d1,d2,d3,d4; +#else word64 d0,d1,d2,d3,d4; +#endif word32 c; @@ -362,6 +373,41 @@ static int poly1305_blocks(Poly1305* ctx, const unsigned char *m, h4 += (U8TO32(m+12) >> 8) | hibit; /* h *= r */ +#ifdef WOLFSSL_W64_WRAPPER + { + w64wrapper tmp; + + d0 = w64Mul(h0, r0); tmp = w64Mul(h1, s4); + d0 = w64Add(d0, tmp, NULL); tmp = w64Mul(h2, s3); + d0 = w64Add(d0, tmp, NULL); tmp = w64Mul(h3, s2); + d0 = w64Add(d0, tmp, NULL); tmp = w64Mul(h4, s1); + d0 = w64Add(d0, tmp, NULL); + + d1 = w64Mul(h0, r1); tmp = w64Mul(h1, r0); + d1 = w64Add(d1, tmp, NULL); tmp = w64Mul(h2, s4); + d1 = w64Add(d1, tmp, NULL); tmp = w64Mul(h3, s3); + d1 = w64Add(d1, tmp, NULL); tmp = w64Mul(h4, s2); + d1 = w64Add(d1, tmp, NULL); + + d2 = w64Mul(h0, r2); tmp = w64Mul(h1, r1); + d2 = w64Add(d2, tmp, NULL); tmp = w64Mul(h2, r0); + d2 = w64Add(d2, tmp, NULL); tmp = w64Mul(h3, s4); + d2 = w64Add(d2, tmp, NULL); tmp = w64Mul(h4, s3); + d2 = w64Add(d2, tmp, NULL); + + d3 = w64Mul(h0, r3); tmp = w64Mul(h1, r2); + d3 = w64Add(d3, tmp, NULL); tmp = w64Mul(h2, r1); + d3 = w64Add(d3, tmp, NULL); tmp = w64Mul(h3, r0); + d3 = w64Add(d3, tmp, NULL); tmp = w64Mul(h4, s4); + d3 = w64Add(d3, tmp, NULL); + + d4 = w64Mul(h0, r4); tmp = w64Mul(h1, r3); + d4 = w64Add(d4, tmp, NULL); tmp = w64Mul(h2, r2); + d4 = w64Add(d4, tmp, NULL); tmp = w64Mul(h3, r1); + d4 = w64Add(d4, tmp, NULL); tmp = w64Mul(h4, r0); + d4 = w64Add(d4, tmp, NULL); + } +#else d0 = ((word64)h0 * r0) + ((word64)h1 * s4) + ((word64)h2 * s3) + ((word64)h3 * s2) + ((word64)h4 * s1); d1 = ((word64)h0 * r1) + ((word64)h1 * r0) + ((word64)h2 * s4) + @@ -372,13 +418,26 @@ static int poly1305_blocks(Poly1305* ctx, const unsigned char *m, ((word64)h3 * r0) + ((word64)h4 * s4); d4 = ((word64)h0 * r4) + ((word64)h1 * r3) + ((word64)h2 * r2) + ((word64)h3 * r1) + ((word64)h4 * r0); +#endif /* (partial) h %= p */ +#ifdef WOLFSSL_W64_WRAPPER + c = w64GetLow32(w64ShiftRight(d0, 26));h0 = w64GetLow32(d0) & 0x3ffffff; + d1 = w64Add32(d1, c, NULL); + c = w64GetLow32(w64ShiftRight(d1, 26));h1 = w64GetLow32(d1) & 0x3ffffff; + d2 = w64Add32(d2, c, NULL); + c = w64GetLow32(w64ShiftRight(d2, 26));h2 = w64GetLow32(d2) & 0x3ffffff; + d3 = w64Add32(d3, c, NULL); + c = w64GetLow32(w64ShiftRight(d3, 26));h3 = w64GetLow32(d3) & 0x3ffffff; + d4 = w64Add32(d4, c, NULL); + c = w64GetLow32(w64ShiftRight(d4, 26));h4 = w64GetLow32(d4) & 0x3ffffff; +#else c = (word32)(d0 >> 26); h0 = (word32)d0 & 0x3ffffff; d1 += c; c = (word32)(d1 >> 26); h1 = (word32)d1 & 0x3ffffff; d2 += c; c = (word32)(d2 >> 26); h2 = (word32)d2 & 0x3ffffff; d3 += c; c = (word32)(d3 >> 26); h3 = (word32)d3 & 0x3ffffff; d4 += c; c = (word32)(d4 >> 26); h4 = (word32)d4 & 0x3ffffff; +#endif h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff; h1 += c; @@ -517,7 +576,11 @@ int wc_Poly1305Final(Poly1305* ctx, byte* mac) word32 h0,h1,h2,h3,h4,c; word32 g0,g1,g2,g3,g4; +#ifdef WOLFSSL_W64_WRAPPER + w64wrapper f; +#else word64 f; +#endif word32 mask; #endif @@ -656,10 +719,31 @@ int wc_Poly1305Final(Poly1305* ctx, byte* mac) h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; /* mac = (h + pad) % (2^128) */ +#ifdef WOLFSSL_W64_WRAPPER + w64SetLow32(&f, h0); + f = w64Add32(f, ctx->pad[0], NULL); + h0 = w64GetLow32(f); + + f = w64ShiftRight(f, 32); + f = w64Add32(f, h1, NULL); + f = w64Add32(f, ctx->pad[1], NULL); + h1 = w64GetLow32(f); + + f = w64ShiftRight(f, 32); + f = w64Add32(f, h2, NULL); + f = w64Add32(f, ctx->pad[2], NULL); + h2 = w64GetLow32(f); + + f = w64ShiftRight(f, 32); + f = w64Add32(f, h3, NULL); + f = w64Add32(f, ctx->pad[3], NULL); + h3 = w64GetLow32(f); +#else f = (word64)h0 + ctx->pad[0] ; h0 = (word32)f; f = (word64)h1 + ctx->pad[1] + (f >> 32); h1 = (word32)f; f = (word64)h2 + ctx->pad[2] + (f >> 32); h2 = (word32)f; f = (word64)h3 + ctx->pad[3] + (f >> 32); h3 = (word32)f; +#endif U32TO8(mac + 0, h0); U32TO8(mac + 4, h1); diff --git a/wolfssl/wolfcrypt/misc.h b/wolfssl/wolfcrypt/misc.h index 9761d686a..823d0f088 100644 --- a/wolfssl/wolfcrypt/misc.h +++ b/wolfssl/wolfcrypt/misc.h @@ -145,6 +145,7 @@ WOLFSSL_LOCAL word32 w64GetLow32(w64wrapper n); WOLFSSL_LOCAL word32 w64GetHigh32(w64wrapper n); WOLFSSL_LOCAL void w64SetLow32(w64wrapper *n, word32 low); WOLFSSL_LOCAL w64wrapper w64Add32(w64wrapper a, word32 b, byte *wrap); +WOLFSSL_LOCAL w64wrapper w64Add(w64wrapper a, w64wrapper b, byte *wrap); WOLFSSL_LOCAL w64wrapper w64Sub32(w64wrapper a, word32 b, byte *wrap); WOLFSSL_LOCAL byte w64GT(w64wrapper a, w64wrapper b); WOLFSSL_LOCAL byte w64IsZero(w64wrapper a); @@ -157,6 +158,7 @@ WOLFSSL_LOCAL w64wrapper w64Sub(w64wrapper a, w64wrapper b); WOLFSSL_LOCAL void w64Zero(w64wrapper *a); WOLFSSL_LOCAL w64wrapper w64ShiftRight(w64wrapper a, int shift); WOLFSSL_LOCAL w64wrapper w64ShiftLeft(w64wrapper a, int shift); +WOLFSSL_LOCAL w64wrapper w64Mul(word32 a, word32 b); #else /* !NO_INLINE */ diff --git a/wolfssl/wolfcrypt/poly1305.h b/wolfssl/wolfcrypt/poly1305.h index 00232ae78..94b5a28e1 100644 --- a/wolfssl/wolfcrypt/poly1305.h +++ b/wolfssl/wolfcrypt/poly1305.h @@ -57,7 +57,7 @@ #if defined(USE_INTEL_POLY1305_SPEEDUP) #elif (defined(WC_HAS_SIZEOF_INT128_64BIT) || defined(WC_HAS_MSVC_64BIT) || \ - defined(WC_HAS_GCC_4_4_64BIT)) + defined(WC_HAS_GCC_4_4_64BIT)) && !defined(WOLFSSL_W64_WRAPPER_TEST) #define POLY130564 #else #define POLY130532 From 04ab561a65fa02a5303d9119be54cf3ebfe02abc Mon Sep 17 00:00:00 2001 From: JacobBarthelmeh Date: Thu, 18 Jul 2024 07:30:08 -0600 Subject: [PATCH 2/6] add smallstack support for poly1305 w64wrapper --- wolfcrypt/src/poly1305.c | 91 ++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 36 deletions(-) diff --git a/wolfcrypt/src/poly1305.c b/wolfcrypt/src/poly1305.c index 7fd57c42d..ec39484c4 100644 --- a/wolfcrypt/src/poly1305.c +++ b/wolfcrypt/src/poly1305.c @@ -339,12 +339,22 @@ static int poly1305_blocks(Poly1305* ctx, const unsigned char *m, word32 r0,r1,r2,r3,r4; word32 s1,s2,s3,s4; word32 h0,h1,h2,h3,h4; + word32 c; #ifdef WOLFSSL_W64_WRAPPER - w64wrapper d0,d1,d2,d3,d4; + #ifdef WOLFSSL_SMALL_STACK + w64wrapper* d; + + d = (w64wrapper*)XMALLOC(5 * sizeof(w64wrapper), NULL, + DYNAMIC_TYPE_TMP_BUFFER); + if (d == NULL) { + return MEMORY_E; + } + #else + w64wrapper d[5]; + #endif #else word64 d0,d1,d2,d3,d4; #endif - word32 c; r0 = ctx->r[0]; @@ -377,35 +387,35 @@ static int poly1305_blocks(Poly1305* ctx, const unsigned char *m, { w64wrapper tmp; - d0 = w64Mul(h0, r0); tmp = w64Mul(h1, s4); - d0 = w64Add(d0, tmp, NULL); tmp = w64Mul(h2, s3); - d0 = w64Add(d0, tmp, NULL); tmp = w64Mul(h3, s2); - d0 = w64Add(d0, tmp, NULL); tmp = w64Mul(h4, s1); - d0 = w64Add(d0, tmp, NULL); + d[0] = w64Mul(h0, r0); tmp = w64Mul(h1, s4); + d[0] = w64Add(d[0], tmp, NULL); tmp = w64Mul(h2, s3); + d[0] = w64Add(d[0], tmp, NULL); tmp = w64Mul(h3, s2); + d[0] = w64Add(d[0], tmp, NULL); tmp = w64Mul(h4, s1); + d[0] = w64Add(d[0], tmp, NULL); - d1 = w64Mul(h0, r1); tmp = w64Mul(h1, r0); - d1 = w64Add(d1, tmp, NULL); tmp = w64Mul(h2, s4); - d1 = w64Add(d1, tmp, NULL); tmp = w64Mul(h3, s3); - d1 = w64Add(d1, tmp, NULL); tmp = w64Mul(h4, s2); - d1 = w64Add(d1, tmp, NULL); + d[1] = w64Mul(h0, r1); tmp = w64Mul(h1, r0); + d[1] = w64Add(d[1], tmp, NULL); tmp = w64Mul(h2, s4); + d[1] = w64Add(d[1], tmp, NULL); tmp = w64Mul(h3, s3); + d[1] = w64Add(d[1], tmp, NULL); tmp = w64Mul(h4, s2); + d[1] = w64Add(d[1], tmp, NULL); - d2 = w64Mul(h0, r2); tmp = w64Mul(h1, r1); - d2 = w64Add(d2, tmp, NULL); tmp = w64Mul(h2, r0); - d2 = w64Add(d2, tmp, NULL); tmp = w64Mul(h3, s4); - d2 = w64Add(d2, tmp, NULL); tmp = w64Mul(h4, s3); - d2 = w64Add(d2, tmp, NULL); + d[2] = w64Mul(h0, r2); tmp = w64Mul(h1, r1); + d[2] = w64Add(d[2], tmp, NULL); tmp = w64Mul(h2, r0); + d[2] = w64Add(d[2], tmp, NULL); tmp = w64Mul(h3, s4); + d[2] = w64Add(d[2], tmp, NULL); tmp = w64Mul(h4, s3); + d[2] = w64Add(d[2], tmp, NULL); - d3 = w64Mul(h0, r3); tmp = w64Mul(h1, r2); - d3 = w64Add(d3, tmp, NULL); tmp = w64Mul(h2, r1); - d3 = w64Add(d3, tmp, NULL); tmp = w64Mul(h3, r0); - d3 = w64Add(d3, tmp, NULL); tmp = w64Mul(h4, s4); - d3 = w64Add(d3, tmp, NULL); + d[3] = w64Mul(h0, r3); tmp = w64Mul(h1, r2); + d[3] = w64Add(d[3], tmp, NULL); tmp = w64Mul(h2, r1); + d[3] = w64Add(d[3], tmp, NULL); tmp = w64Mul(h3, r0); + d[3] = w64Add(d[3], tmp, NULL); tmp = w64Mul(h4, s4); + d[3] = w64Add(d[3], tmp, NULL); - d4 = w64Mul(h0, r4); tmp = w64Mul(h1, r3); - d4 = w64Add(d4, tmp, NULL); tmp = w64Mul(h2, r2); - d4 = w64Add(d4, tmp, NULL); tmp = w64Mul(h3, r1); - d4 = w64Add(d4, tmp, NULL); tmp = w64Mul(h4, r0); - d4 = w64Add(d4, tmp, NULL); + d[4] = w64Mul(h0, r4); tmp = w64Mul(h1, r3); + d[4] = w64Add(d[4], tmp, NULL); tmp = w64Mul(h2, r2); + d[4] = w64Add(d[4], tmp, NULL); tmp = w64Mul(h3, r1); + d[4] = w64Add(d[4], tmp, NULL); tmp = w64Mul(h4, r0); + d[4] = w64Add(d[4], tmp, NULL); } #else d0 = ((word64)h0 * r0) + ((word64)h1 * s4) + ((word64)h2 * s3) + @@ -422,15 +432,20 @@ static int poly1305_blocks(Poly1305* ctx, const unsigned char *m, /* (partial) h %= p */ #ifdef WOLFSSL_W64_WRAPPER - c = w64GetLow32(w64ShiftRight(d0, 26));h0 = w64GetLow32(d0) & 0x3ffffff; - d1 = w64Add32(d1, c, NULL); - c = w64GetLow32(w64ShiftRight(d1, 26));h1 = w64GetLow32(d1) & 0x3ffffff; - d2 = w64Add32(d2, c, NULL); - c = w64GetLow32(w64ShiftRight(d2, 26));h2 = w64GetLow32(d2) & 0x3ffffff; - d3 = w64Add32(d3, c, NULL); - c = w64GetLow32(w64ShiftRight(d3, 26));h3 = w64GetLow32(d3) & 0x3ffffff; - d4 = w64Add32(d4, c, NULL); - c = w64GetLow32(w64ShiftRight(d4, 26));h4 = w64GetLow32(d4) & 0x3ffffff; + c = w64GetLow32(w64ShiftRight(d[0], 26)); + h0 = w64GetLow32(d[0]) & 0x3ffffff; + d[1] = w64Add32(d[1], c, NULL); + c = w64GetLow32(w64ShiftRight(d[1], 26)); + h1 = w64GetLow32(d[1]) & 0x3ffffff; + d[2] = w64Add32(d[2], c, NULL); + c = w64GetLow32(w64ShiftRight(d[2], 26)); + h2 = w64GetLow32(d[2]) & 0x3ffffff; + d[3] = w64Add32(d[3], c, NULL); + c = w64GetLow32(w64ShiftRight(d[3], 26)); + h3 = w64GetLow32(d[3]) & 0x3ffffff; + d[4] = w64Add32(d[4], c, NULL); + c = w64GetLow32(w64ShiftRight(d[4], 26)); + h4 = w64GetLow32(d[4]) & 0x3ffffff; #else c = (word32)(d0 >> 26); h0 = (word32)d0 & 0x3ffffff; d1 += c; c = (word32)(d1 >> 26); h1 = (word32)d1 & 0x3ffffff; @@ -451,6 +466,10 @@ static int poly1305_blocks(Poly1305* ctx, const unsigned char *m, ctx->h[3] = h3; ctx->h[4] = h4; +#if defined(WOLFSSL_W64_WRAPPER) && defined(WOLFSSL_SMALL_STACK) + XFREE(d, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return 0; #endif /* end of 64 bit cpu blocks or 32 bit cpu */ From 8a9c893c6f82db596a0fb8af668cddfa1da0e69f Mon Sep 17 00:00:00 2001 From: JacobBarthelmeh Date: Fri, 19 Jul 2024 11:03:44 -0600 Subject: [PATCH 3/6] fix for initialization of high value and funtction signature --- wolfcrypt/src/misc.c | 8 ++++---- wolfcrypt/src/poly1305.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/wolfcrypt/src/misc.c b/wolfcrypt/src/misc.c index c08dd3057..a25de2d21 100644 --- a/wolfcrypt/src/misc.c +++ b/wolfcrypt/src/misc.c @@ -796,7 +796,7 @@ WC_MISC_STATIC WC_INLINE w64wrapper w64ShiftLeft(w64wrapper a, int shift) return a; } -WC_MISC_STATIC WC_INLINE w64wrapper w64Mul(unsigned int a, unsigned int b) +WC_MISC_STATIC WC_INLINE w64wrapper w64Mul(word32 a, word32 b) { w64wrapper ret; ret.n = (word64)a * (word64)b; @@ -838,7 +838,7 @@ WC_MISC_STATIC WC_INLINE void w64SetLow32(w64wrapper *n, word32 low) WC_MISC_STATIC WC_INLINE w64wrapper w64Add32(w64wrapper a, word32 b, byte *wrap) { - a.n[1] = a.n[1] + b; + a.n[1] += b; if (a.n[1] < b) { a.n[0]++; if (wrap != NULL && a.n[0] == 0) @@ -851,14 +851,14 @@ WC_MISC_STATIC WC_INLINE w64wrapper w64Add32(w64wrapper a, word32 b, byte *wrap) WC_MISC_STATIC WC_INLINE w64wrapper w64Add(w64wrapper a, w64wrapper b, byte *wrap) { - a.n[1] = a.n[1] + b.n[1]; + a.n[1] += b.n[1]; if (a.n[1] < b.n[1]) { a.n[0]++; if (wrap != NULL && a.n[0] == 0) *wrap = 1; } - a.n[0] = a.n[0] + b.n[0]; + a.n[0] += b.n[0]; if (a.n[0] < b.n[0]) { *wrap = 1; } diff --git a/wolfcrypt/src/poly1305.c b/wolfcrypt/src/poly1305.c index ec39484c4..c77bbca7d 100644 --- a/wolfcrypt/src/poly1305.c +++ b/wolfcrypt/src/poly1305.c @@ -739,7 +739,7 @@ int wc_Poly1305Final(Poly1305* ctx, byte* mac) /* mac = (h + pad) % (2^128) */ #ifdef WOLFSSL_W64_WRAPPER - w64SetLow32(&f, h0); + f = w64From32(0, h0); f = w64Add32(f, ctx->pad[0], NULL); h0 = w64GetLow32(f); From f5ed2460df1bc9d9a679f5b28d86d597a090aed3 Mon Sep 17 00:00:00 2001 From: JacobBarthelmeh Date: Fri, 19 Jul 2024 13:59:05 -0600 Subject: [PATCH 4/6] cast to larger type for multiplication --- wolfcrypt/src/misc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/wolfcrypt/src/misc.c b/wolfcrypt/src/misc.c index a25de2d21..7f8f3f7d7 100644 --- a/wolfcrypt/src/misc.c +++ b/wolfcrypt/src/misc.c @@ -997,10 +997,10 @@ WC_MISC_STATIC WC_INLINE w64wrapper w64Mul(word32 a, word32 b) ltlC = b & 0xFFFF; ltlD = (b >> 16) & 0xFFFF; - bigA = ltlA * ltlC; - bigC = ltlB * ltlC; - bigD = ltlA * ltlD; - bigB = ltlB * ltlD; + bigA = (word32)ltlA * (word32)ltlC; + bigC = (word32)ltlB * (word32)ltlC; + bigD = (word32)ltlA * (word32)ltlD; + bigB = (word32)ltlB * (word32)ltlD; ret = w64From32(0, bigB); ret = w64ShiftLeft(ret, 16); From cc2ed4a75b4b0db5cfbc0f7896afea94a3c7bf24 Mon Sep 17 00:00:00 2001 From: JacobBarthelmeh Date: Mon, 5 Aug 2024 16:47:35 -0600 Subject: [PATCH 5/6] add w64Add for build with word64 --- wolfcrypt/src/misc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/wolfcrypt/src/misc.c b/wolfcrypt/src/misc.c index 7f8f3f7d7..a87909080 100644 --- a/wolfcrypt/src/misc.c +++ b/wolfcrypt/src/misc.c @@ -716,6 +716,16 @@ WC_MISC_STATIC WC_INLINE w64wrapper w64Add32(w64wrapper a, word32 b, byte *wrap) return a; } +WC_MISC_STATIC WC_INLINE w64wrapper w64Add(w64wrapper a, w64wrapper b, + byte *wrap) +{ + a.n = a.n + b.n; + if (a.n < b.n && wrap != NULL) + *wrap = 1; + + return a; +} + WC_MISC_STATIC WC_INLINE w64wrapper w64Sub32(w64wrapper a, word32 b, byte *wrap) { if (a.n < b && wrap != NULL) @@ -919,7 +929,7 @@ WC_MISC_STATIC WC_INLINE byte w64IsZero(w64wrapper a) return a.n[0] == 0 && a.n[1] == 0; } -WC_MISC_STATIC WC_INLINE void c64toa(w64wrapper *a, byte *out) +WC_MISC_STATIC WC_INLINE void c64toa(const w64wrapper *a, byte *out) { #ifdef BIG_ENDIAN_ORDER word32 *_out = (word32*)(out); From f1ace6236391c3fbbc384512efd61bf558aac1f6 Mon Sep 17 00:00:00 2001 From: JacobBarthelmeh Date: Tue, 6 Aug 2024 09:12:17 -0600 Subject: [PATCH 6/6] add null sanity check and adjust add --- wolfcrypt/src/misc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wolfcrypt/src/misc.c b/wolfcrypt/src/misc.c index a87909080..163ec1154 100644 --- a/wolfcrypt/src/misc.c +++ b/wolfcrypt/src/misc.c @@ -709,7 +709,7 @@ WC_MISC_STATIC WC_INLINE void w64SetLow32(w64wrapper *n, word32 low) { WC_MISC_STATIC WC_INLINE w64wrapper w64Add32(w64wrapper a, word32 b, byte *wrap) { - a.n = a.n + b; + a.n += b; if (a.n < b && wrap != NULL) *wrap = 1; @@ -719,7 +719,7 @@ WC_MISC_STATIC WC_INLINE w64wrapper w64Add32(w64wrapper a, word32 b, byte *wrap) WC_MISC_STATIC WC_INLINE w64wrapper w64Add(w64wrapper a, w64wrapper b, byte *wrap) { - a.n = a.n + b.n; + a.n += b.n; if (a.n < b.n && wrap != NULL) *wrap = 1; @@ -869,7 +869,7 @@ WC_MISC_STATIC WC_INLINE w64wrapper w64Add(w64wrapper a, w64wrapper b, } a.n[0] += b.n[0]; - if (a.n[0] < b.n[0]) { + if (wrap != NULL && a.n[0] < b.n[0]) { *wrap = 1; }