forked from wolfSSL/wolfssl
Added faster SHA256_MANY_REGISTERS
support (thanks Sean). The WOLFSSL_SHA256_BY_SPEC
option restore old math, the new case is equivalent math, but easier for compiler to optimize.
This commit is contained in:
@ -582,8 +582,13 @@ static int InitSha256(wc_Sha256* sha256)
|
||||
0x90BEFFFAL, 0xA4506CEBL, 0xBEF9A3F7L, 0xC67178F2L
|
||||
};
|
||||
|
||||
#ifdef WOLFSSL_SHA256_BY_SPEC
|
||||
#define Ch(x,y,z) ((z) ^ ((x) & ((y) ^ (z))))
|
||||
#define Maj(x,y,z) ((((x) | (y)) & (z)) | ((x) & (y)))
|
||||
#else
|
||||
#define Ch(x,y,z) ((((y) ^ (z)) & (x)) ^ (z))
|
||||
#define Maj(x,y,z) ((((x) ^ (y)) & ((y) ^ (z))) ^ (y))
|
||||
#endif
|
||||
#define R(x, n) (((x) & 0xFFFFFFFFU) >> (n))
|
||||
|
||||
#define S(x, n) rotrFixed(x, n)
|
||||
@ -601,6 +606,7 @@ static int InitSha256(wc_Sha256* sha256)
|
||||
#define g(i) S[(6-i) & 7]
|
||||
#define h(i) S[(7-i) & 7]
|
||||
|
||||
#ifndef SHA256_MANY_REGISTERS
|
||||
#define RND(j) \
|
||||
t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + W[i+j]; \
|
||||
t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \
|
||||
@ -672,6 +678,73 @@ static int InitSha256(wc_Sha256* sha256)
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
#define SCHED1(j) (W[j] = sha256->buffer[j])
|
||||
#define SCHED(j) ( \
|
||||
W[ j & 15] += \
|
||||
Gamma1(W[(j-2) & 15])+ \
|
||||
W[(j-7) & 15] + \
|
||||
Gamma0(W[(j-15) & 15]) \
|
||||
)
|
||||
|
||||
#define RND1(j) \
|
||||
t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + SCHED1(j); \
|
||||
t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \
|
||||
d(j) += t0; \
|
||||
h(j) = t0 + t1
|
||||
#define RNDN(j) \
|
||||
t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + SCHED(j); \
|
||||
t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \
|
||||
d(j) += t0; \
|
||||
h(j) = t0 + t1
|
||||
|
||||
#ifndef XTRANSFORM
|
||||
#define XTRANSFORM(S) Transform_Sha256((S))
|
||||
#define XTRANSFORM_LEN(S, D, L) Transform_Sha256_Len((S),(D),(L))
|
||||
#endif
|
||||
|
||||
static int Transform_Sha256(wc_Sha256* sha256)
|
||||
{
|
||||
word32 S[8], t0, t1;
|
||||
int i;
|
||||
word32 W[16];
|
||||
|
||||
/* Copy digest to working vars */
|
||||
S[0] = sha256->digest[0];
|
||||
S[1] = sha256->digest[1];
|
||||
S[2] = sha256->digest[2];
|
||||
S[3] = sha256->digest[3];
|
||||
S[4] = sha256->digest[4];
|
||||
S[5] = sha256->digest[5];
|
||||
S[6] = sha256->digest[6];
|
||||
S[7] = sha256->digest[7];
|
||||
|
||||
i = 0;
|
||||
RND1( 0); RND1( 1); RND1( 2); RND1( 3);
|
||||
RND1( 4); RND1( 5); RND1( 6); RND1( 7);
|
||||
RND1( 8); RND1( 9); RND1(10); RND1(11);
|
||||
RND1(12); RND1(13); RND1(14); RND1(15);
|
||||
/* 64 operations, partially loop unrolled */
|
||||
for (i = 16; i < 64; i += 16) {
|
||||
RNDN( 0); RNDN( 1); RNDN( 2); RNDN( 3);
|
||||
RNDN( 4); RNDN( 5); RNDN( 6); RNDN( 7);
|
||||
RNDN( 8); RNDN( 9); RNDN(10); RNDN(11);
|
||||
RNDN(12); RNDN(13); RNDN(14); RNDN(15);
|
||||
}
|
||||
|
||||
/* Add the working vars back into digest */
|
||||
sha256->digest[0] += S[0];
|
||||
sha256->digest[1] += S[1];
|
||||
sha256->digest[2] += S[2];
|
||||
sha256->digest[3] += S[3];
|
||||
sha256->digest[4] += S[4];
|
||||
sha256->digest[5] += S[5];
|
||||
sha256->digest[6] += S[6];
|
||||
sha256->digest[7] += S[7];
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
/* End wc_ software implementation */
|
||||
|
||||
|
Reference in New Issue
Block a user