forked from wolfSSL/wolfssl
add w64wrapper support in poly1305
This commit is contained in:
@ -796,6 +796,13 @@ WC_MISC_STATIC WC_INLINE w64wrapper w64ShiftLeft(w64wrapper a, int shift)
|
||||
return a;
|
||||
}
|
||||
|
||||
WC_MISC_STATIC WC_INLINE w64wrapper w64Mul(unsigned int a, unsigned int b)
|
||||
{
|
||||
w64wrapper ret;
|
||||
ret.n = (word64)a * (word64)b;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
WC_MISC_STATIC WC_INLINE void w64Increment(w64wrapper *n)
|
||||
@ -841,6 +848,24 @@ WC_MISC_STATIC WC_INLINE w64wrapper w64Add32(w64wrapper a, word32 b, byte *wrap)
|
||||
return a;
|
||||
}
|
||||
|
||||
WC_MISC_STATIC WC_INLINE w64wrapper w64Add(w64wrapper a, w64wrapper b,
|
||||
byte *wrap)
|
||||
{
|
||||
a.n[1] = a.n[1] + b.n[1];
|
||||
if (a.n[1] < b.n[1]) {
|
||||
a.n[0]++;
|
||||
if (wrap != NULL && a.n[0] == 0)
|
||||
*wrap = 1;
|
||||
}
|
||||
|
||||
a.n[0] = a.n[0] + b.n[0];
|
||||
if (a.n[0] < b.n[0]) {
|
||||
*wrap = 1;
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
WC_MISC_STATIC WC_INLINE w64wrapper w64Sub32(w64wrapper a, word32 b, byte *wrap)
|
||||
{
|
||||
byte _underflow = 0;
|
||||
@ -939,7 +964,7 @@ WC_MISC_STATIC WC_INLINE byte w64LT(w64wrapper a, w64wrapper b)
|
||||
WC_MISC_STATIC WC_INLINE w64wrapper w64ShiftRight(w64wrapper a, int shift)
|
||||
{
|
||||
if (shift < 32) {
|
||||
a.n[1] = (a.n[1] >> shift) || (a.n[0] << (32 - shift));
|
||||
a.n[1] = (a.n[1] >> shift) | (a.n[0] << (32 - shift));
|
||||
a.n[0] >>= shift;
|
||||
}
|
||||
else {
|
||||
@ -951,7 +976,7 @@ WC_MISC_STATIC WC_INLINE w64wrapper w64ShiftRight(w64wrapper a, int shift)
|
||||
WC_MISC_STATIC WC_INLINE w64wrapper w64ShiftLeft(w64wrapper a, int shift)
|
||||
{
|
||||
if (shift < 32) {
|
||||
a.n[0] = (a.n[0] << shift) || (a.n[1] >> (32 - shift));
|
||||
a.n[0] = (a.n[0] << shift) | (a.n[1] >> (32 - shift));
|
||||
a.n[1] <<= shift;
|
||||
}
|
||||
else {
|
||||
@ -961,6 +986,30 @@ WC_MISC_STATIC WC_INLINE w64wrapper w64ShiftLeft(w64wrapper a, int shift)
|
||||
return a;
|
||||
}
|
||||
|
||||
WC_MISC_STATIC WC_INLINE w64wrapper w64Mul(word32 a, word32 b)
|
||||
{
|
||||
w64wrapper ret;
|
||||
word16 ltlA, ltlB, ltlC, ltlD;
|
||||
word32 bigA, bigB, bigC, bigD;
|
||||
|
||||
ltlA = a & 0xFFFF;
|
||||
ltlB = (a >> 16) & 0xFFFF;
|
||||
ltlC = b & 0xFFFF;
|
||||
ltlD = (b >> 16) & 0xFFFF;
|
||||
|
||||
bigA = ltlA * ltlC;
|
||||
bigC = ltlB * ltlC;
|
||||
bigD = ltlA * ltlD;
|
||||
bigB = ltlB * ltlD;
|
||||
|
||||
ret = w64From32(0, bigB);
|
||||
ret = w64ShiftLeft(ret, 16);
|
||||
ret = w64Add32(ret, bigD, NULL);
|
||||
ret = w64Add32(ret, bigC, NULL);
|
||||
ret = w64ShiftLeft(ret, 16);
|
||||
return w64Add32(ret, bigA, NULL);
|
||||
}
|
||||
|
||||
#endif /* WORD64_AVAILABLE && !WOLFSSL_W64_WRAPPER_TEST */
|
||||
#endif /* WOLFSSL_W64_WRAPPER */
|
||||
|
||||
|
@ -29,6 +29,13 @@ and Daniel J. Bernstein
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* WOLFSSL_W64_WRAPPER Uses wrappers around word64 types for a system that does
|
||||
* not have word64 available. As expected it reduces
|
||||
* performance. Benchmarks collected July 2024 show
|
||||
* 303.004 MiB/s with and 1874.194 MiB/s without.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
@ -332,7 +339,11 @@ static int poly1305_blocks(Poly1305* ctx, const unsigned char *m,
|
||||
word32 r0,r1,r2,r3,r4;
|
||||
word32 s1,s2,s3,s4;
|
||||
word32 h0,h1,h2,h3,h4;
|
||||
#ifdef WOLFSSL_W64_WRAPPER
|
||||
w64wrapper d0,d1,d2,d3,d4;
|
||||
#else
|
||||
word64 d0,d1,d2,d3,d4;
|
||||
#endif
|
||||
word32 c;
|
||||
|
||||
|
||||
@ -362,6 +373,41 @@ static int poly1305_blocks(Poly1305* ctx, const unsigned char *m,
|
||||
h4 += (U8TO32(m+12) >> 8) | hibit;
|
||||
|
||||
/* h *= r */
|
||||
#ifdef WOLFSSL_W64_WRAPPER
|
||||
{
|
||||
w64wrapper tmp;
|
||||
|
||||
d0 = w64Mul(h0, r0); tmp = w64Mul(h1, s4);
|
||||
d0 = w64Add(d0, tmp, NULL); tmp = w64Mul(h2, s3);
|
||||
d0 = w64Add(d0, tmp, NULL); tmp = w64Mul(h3, s2);
|
||||
d0 = w64Add(d0, tmp, NULL); tmp = w64Mul(h4, s1);
|
||||
d0 = w64Add(d0, tmp, NULL);
|
||||
|
||||
d1 = w64Mul(h0, r1); tmp = w64Mul(h1, r0);
|
||||
d1 = w64Add(d1, tmp, NULL); tmp = w64Mul(h2, s4);
|
||||
d1 = w64Add(d1, tmp, NULL); tmp = w64Mul(h3, s3);
|
||||
d1 = w64Add(d1, tmp, NULL); tmp = w64Mul(h4, s2);
|
||||
d1 = w64Add(d1, tmp, NULL);
|
||||
|
||||
d2 = w64Mul(h0, r2); tmp = w64Mul(h1, r1);
|
||||
d2 = w64Add(d2, tmp, NULL); tmp = w64Mul(h2, r0);
|
||||
d2 = w64Add(d2, tmp, NULL); tmp = w64Mul(h3, s4);
|
||||
d2 = w64Add(d2, tmp, NULL); tmp = w64Mul(h4, s3);
|
||||
d2 = w64Add(d2, tmp, NULL);
|
||||
|
||||
d3 = w64Mul(h0, r3); tmp = w64Mul(h1, r2);
|
||||
d3 = w64Add(d3, tmp, NULL); tmp = w64Mul(h2, r1);
|
||||
d3 = w64Add(d3, tmp, NULL); tmp = w64Mul(h3, r0);
|
||||
d3 = w64Add(d3, tmp, NULL); tmp = w64Mul(h4, s4);
|
||||
d3 = w64Add(d3, tmp, NULL);
|
||||
|
||||
d4 = w64Mul(h0, r4); tmp = w64Mul(h1, r3);
|
||||
d4 = w64Add(d4, tmp, NULL); tmp = w64Mul(h2, r2);
|
||||
d4 = w64Add(d4, tmp, NULL); tmp = w64Mul(h3, r1);
|
||||
d4 = w64Add(d4, tmp, NULL); tmp = w64Mul(h4, r0);
|
||||
d4 = w64Add(d4, tmp, NULL);
|
||||
}
|
||||
#else
|
||||
d0 = ((word64)h0 * r0) + ((word64)h1 * s4) + ((word64)h2 * s3) +
|
||||
((word64)h3 * s2) + ((word64)h4 * s1);
|
||||
d1 = ((word64)h0 * r1) + ((word64)h1 * r0) + ((word64)h2 * s4) +
|
||||
@ -372,13 +418,26 @@ static int poly1305_blocks(Poly1305* ctx, const unsigned char *m,
|
||||
((word64)h3 * r0) + ((word64)h4 * s4);
|
||||
d4 = ((word64)h0 * r4) + ((word64)h1 * r3) + ((word64)h2 * r2) +
|
||||
((word64)h3 * r1) + ((word64)h4 * r0);
|
||||
#endif
|
||||
|
||||
/* (partial) h %= p */
|
||||
#ifdef WOLFSSL_W64_WRAPPER
|
||||
c = w64GetLow32(w64ShiftRight(d0, 26));h0 = w64GetLow32(d0) & 0x3ffffff;
|
||||
d1 = w64Add32(d1, c, NULL);
|
||||
c = w64GetLow32(w64ShiftRight(d1, 26));h1 = w64GetLow32(d1) & 0x3ffffff;
|
||||
d2 = w64Add32(d2, c, NULL);
|
||||
c = w64GetLow32(w64ShiftRight(d2, 26));h2 = w64GetLow32(d2) & 0x3ffffff;
|
||||
d3 = w64Add32(d3, c, NULL);
|
||||
c = w64GetLow32(w64ShiftRight(d3, 26));h3 = w64GetLow32(d3) & 0x3ffffff;
|
||||
d4 = w64Add32(d4, c, NULL);
|
||||
c = w64GetLow32(w64ShiftRight(d4, 26));h4 = w64GetLow32(d4) & 0x3ffffff;
|
||||
#else
|
||||
c = (word32)(d0 >> 26); h0 = (word32)d0 & 0x3ffffff;
|
||||
d1 += c; c = (word32)(d1 >> 26); h1 = (word32)d1 & 0x3ffffff;
|
||||
d2 += c; c = (word32)(d2 >> 26); h2 = (word32)d2 & 0x3ffffff;
|
||||
d3 += c; c = (word32)(d3 >> 26); h3 = (word32)d3 & 0x3ffffff;
|
||||
d4 += c; c = (word32)(d4 >> 26); h4 = (word32)d4 & 0x3ffffff;
|
||||
#endif
|
||||
h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff;
|
||||
h1 += c;
|
||||
|
||||
@ -517,7 +576,11 @@ int wc_Poly1305Final(Poly1305* ctx, byte* mac)
|
||||
|
||||
word32 h0,h1,h2,h3,h4,c;
|
||||
word32 g0,g1,g2,g3,g4;
|
||||
#ifdef WOLFSSL_W64_WRAPPER
|
||||
w64wrapper f;
|
||||
#else
|
||||
word64 f;
|
||||
#endif
|
||||
word32 mask;
|
||||
|
||||
#endif
|
||||
@ -656,10 +719,31 @@ int wc_Poly1305Final(Poly1305* ctx, byte* mac)
|
||||
h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
|
||||
|
||||
/* mac = (h + pad) % (2^128) */
|
||||
#ifdef WOLFSSL_W64_WRAPPER
|
||||
w64SetLow32(&f, h0);
|
||||
f = w64Add32(f, ctx->pad[0], NULL);
|
||||
h0 = w64GetLow32(f);
|
||||
|
||||
f = w64ShiftRight(f, 32);
|
||||
f = w64Add32(f, h1, NULL);
|
||||
f = w64Add32(f, ctx->pad[1], NULL);
|
||||
h1 = w64GetLow32(f);
|
||||
|
||||
f = w64ShiftRight(f, 32);
|
||||
f = w64Add32(f, h2, NULL);
|
||||
f = w64Add32(f, ctx->pad[2], NULL);
|
||||
h2 = w64GetLow32(f);
|
||||
|
||||
f = w64ShiftRight(f, 32);
|
||||
f = w64Add32(f, h3, NULL);
|
||||
f = w64Add32(f, ctx->pad[3], NULL);
|
||||
h3 = w64GetLow32(f);
|
||||
#else
|
||||
f = (word64)h0 + ctx->pad[0] ; h0 = (word32)f;
|
||||
f = (word64)h1 + ctx->pad[1] + (f >> 32); h1 = (word32)f;
|
||||
f = (word64)h2 + ctx->pad[2] + (f >> 32); h2 = (word32)f;
|
||||
f = (word64)h3 + ctx->pad[3] + (f >> 32); h3 = (word32)f;
|
||||
#endif
|
||||
|
||||
U32TO8(mac + 0, h0);
|
||||
U32TO8(mac + 4, h1);
|
||||
|
@ -145,6 +145,7 @@ WOLFSSL_LOCAL word32 w64GetLow32(w64wrapper n);
|
||||
WOLFSSL_LOCAL word32 w64GetHigh32(w64wrapper n);
|
||||
WOLFSSL_LOCAL void w64SetLow32(w64wrapper *n, word32 low);
|
||||
WOLFSSL_LOCAL w64wrapper w64Add32(w64wrapper a, word32 b, byte *wrap);
|
||||
WOLFSSL_LOCAL w64wrapper w64Add(w64wrapper a, w64wrapper b, byte *wrap);
|
||||
WOLFSSL_LOCAL w64wrapper w64Sub32(w64wrapper a, word32 b, byte *wrap);
|
||||
WOLFSSL_LOCAL byte w64GT(w64wrapper a, w64wrapper b);
|
||||
WOLFSSL_LOCAL byte w64IsZero(w64wrapper a);
|
||||
@ -157,6 +158,7 @@ WOLFSSL_LOCAL w64wrapper w64Sub(w64wrapper a, w64wrapper b);
|
||||
WOLFSSL_LOCAL void w64Zero(w64wrapper *a);
|
||||
WOLFSSL_LOCAL w64wrapper w64ShiftRight(w64wrapper a, int shift);
|
||||
WOLFSSL_LOCAL w64wrapper w64ShiftLeft(w64wrapper a, int shift);
|
||||
WOLFSSL_LOCAL w64wrapper w64Mul(word32 a, word32 b);
|
||||
|
||||
#else /* !NO_INLINE */
|
||||
|
||||
|
@ -57,7 +57,7 @@
|
||||
|
||||
#if defined(USE_INTEL_POLY1305_SPEEDUP)
|
||||
#elif (defined(WC_HAS_SIZEOF_INT128_64BIT) || defined(WC_HAS_MSVC_64BIT) || \
|
||||
defined(WC_HAS_GCC_4_4_64BIT))
|
||||
defined(WC_HAS_GCC_4_4_64BIT)) && !defined(WOLFSSL_W64_WRAPPER_TEST)
|
||||
#define POLY130564
|
||||
#else
|
||||
#define POLY130532
|
||||
|
Reference in New Issue
Block a user