mirror of
https://github.com/wolfSSL/wolfssl.git
synced 2026-02-04 04:55:04 +01:00
Merge pull request #1217 from dgarske/sha_slow
New `--enable-lowresource` option and SHA256 not unrolled support
This commit is contained in:
@@ -418,7 +418,7 @@ static INLINE void AddLength(wc_Sha* sha, word32 len)
|
||||
t = e; e = d; d = c; c = b; b = a; a = t;
|
||||
}
|
||||
#else
|
||||
/* nearly 1 K bigger in code size but 25% faster */
|
||||
/* nearly 1 K bigger in code size but 25% faster */
|
||||
/* 4 rounds of 20 operations each. Loop unrolled. */
|
||||
R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
|
||||
R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);
|
||||
|
||||
@@ -538,16 +538,25 @@ static int InitSha256(wc_Sha256* sha256)
|
||||
#define R(x, n) (((x) & 0xFFFFFFFFU) >> (n))
|
||||
|
||||
#define S(x, n) rotrFixed(x, n)
|
||||
#define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
|
||||
#define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
|
||||
#define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
|
||||
#define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
|
||||
#define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
|
||||
#define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
|
||||
#define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
|
||||
|
||||
#define RND(a,b,c,d,e,f,g,h,i) \
|
||||
t0 = (h) + Sigma1((e)) + Ch((e), (f), (g)) + K[(i)] + W[(i)]; \
|
||||
t1 = Sigma0((a)) + Maj((a), (b), (c)); \
|
||||
(d) += t0; \
|
||||
(h) = t0 + t1;
|
||||
#define a(i) S[(0-i) & 7]
|
||||
#define b(i) S[(1-i) & 7]
|
||||
#define c(i) S[(2-i) & 7]
|
||||
#define d(i) S[(3-i) & 7]
|
||||
#define e(i) S[(4-i) & 7]
|
||||
#define f(i) S[(5-i) & 7]
|
||||
#define g(i) S[(6-i) & 7]
|
||||
#define h(i) S[(7-i) & 7]
|
||||
|
||||
#define RND(j) \
|
||||
t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + W[i+j]; \
|
||||
t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \
|
||||
d(j) += t0; \
|
||||
h(j) = t0 + t1
|
||||
|
||||
#ifndef XTRANSFORM
|
||||
#define XTRANSFORM(S, B) Transform((S))
|
||||
@@ -579,16 +588,21 @@ static int InitSha256(wc_Sha256* sha256)
|
||||
for (i = 16; i < WC_SHA256_BLOCK_SIZE; i++)
|
||||
W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16];
|
||||
|
||||
#ifdef USE_SLOW_SHA256
|
||||
/* not unrolled - ~2k smaller and ~25% slower */
|
||||
for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) {
|
||||
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
|
||||
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
|
||||
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
|
||||
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
|
||||
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
|
||||
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
|
||||
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
|
||||
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
|
||||
int j;
|
||||
for (j = 0; j < 8; j++) { /* braces needed here for macros {} */
|
||||
RND(j);
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* partially loop unrolled */
|
||||
for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) {
|
||||
RND(0); RND(1); RND(2); RND(3);
|
||||
RND(4); RND(5); RND(6); RND(7);
|
||||
}
|
||||
#endif /* USE_SLOW_SHA256 */
|
||||
|
||||
/* Add the working vars back into digest state[] */
|
||||
for (i = 0; i < 8; i++) {
|
||||
|
||||
@@ -31,6 +31,11 @@
|
||||
#include <wolfssl/wolfcrypt/error-crypt.h>
|
||||
#include <wolfssl/wolfcrypt/cpuid.h>
|
||||
|
||||
/* deprecated USE_SLOW_SHA2 (replaced with USE_SLOW_SHA512) */
|
||||
#if defined(USE_SLOW_SHA2) && !defined(USE_SLOW_SHA512)
|
||||
#define USE_SLOW_SHA512
|
||||
#endif
|
||||
|
||||
/* fips wrapper calls, user can call direct */
|
||||
#ifdef HAVE_FIPS
|
||||
int wc_InitSha512(wc_Sha512* sha)
|
||||
@@ -401,40 +406,43 @@ static const word64 K512[80] = {
|
||||
W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
|
||||
};
|
||||
|
||||
|
||||
|
||||
#define blk0(i) (W[i] = sha512->buffer[i])
|
||||
|
||||
#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
|
||||
#define blk2(i) (\
|
||||
W[ i & 15] += \
|
||||
s1(W[(i-2) & 15])+ \
|
||||
W[(i-7) & 15] + \
|
||||
s0(W[(i-15) & 15]) \
|
||||
)
|
||||
|
||||
#define Ch(x,y,z) (z^(x&(y^z)))
|
||||
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
|
||||
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
|
||||
#define Maj(x,y,z) ((x & y) | (z & (x | y)))
|
||||
|
||||
#define a(i) T[(0-i)&7]
|
||||
#define b(i) T[(1-i)&7]
|
||||
#define c(i) T[(2-i)&7]
|
||||
#define d(i) T[(3-i)&7]
|
||||
#define e(i) T[(4-i)&7]
|
||||
#define f(i) T[(5-i)&7]
|
||||
#define g(i) T[(6-i)&7]
|
||||
#define h(i) T[(7-i)&7]
|
||||
#define a(i) T[(0-i) & 7]
|
||||
#define b(i) T[(1-i) & 7]
|
||||
#define c(i) T[(2-i) & 7]
|
||||
#define d(i) T[(3-i) & 7]
|
||||
#define e(i) T[(4-i) & 7]
|
||||
#define f(i) T[(5-i) & 7]
|
||||
#define g(i) T[(6-i) & 7]
|
||||
#define h(i) T[(7-i) & 7]
|
||||
|
||||
#define S0(x) (rotrFixed64(x,28)^rotrFixed64(x,34)^rotrFixed64(x,39))
|
||||
#define S1(x) (rotrFixed64(x,14)^rotrFixed64(x,18)^rotrFixed64(x,41))
|
||||
#define s0(x) (rotrFixed64(x,1)^rotrFixed64(x,8)^(x>>7))
|
||||
#define s1(x) (rotrFixed64(x,19)^rotrFixed64(x,61)^(x>>6))
|
||||
#define S0(x) (rotrFixed64(x,28) ^ rotrFixed64(x,34) ^ rotrFixed64(x,39))
|
||||
#define S1(x) (rotrFixed64(x,14) ^ rotrFixed64(x,18) ^ rotrFixed64(x,41))
|
||||
#define s0(x) (rotrFixed64(x,1) ^ rotrFixed64(x,8) ^ (x>>7))
|
||||
#define s1(x) (rotrFixed64(x,19) ^ rotrFixed64(x,61) ^ (x>>6))
|
||||
|
||||
#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j]+(j?blk2(i):blk0(i));\
|
||||
d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
|
||||
#define R(i) \
|
||||
h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[i+j] + (j ? blk2(i) : blk0(i)); \
|
||||
d(i) += h(i); \
|
||||
h(i) += S0(a(i)) + Maj(a(i),b(i),c(i))
|
||||
|
||||
static int _Transform(wc_Sha512* sha512)
|
||||
{
|
||||
const word64* K = K512;
|
||||
|
||||
word32 j;
|
||||
word64 T[8];
|
||||
|
||||
|
||||
#ifdef WOLFSSL_SMALL_STACK
|
||||
word64* W;
|
||||
W = (word64*) XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER);
|
||||
@@ -447,7 +455,7 @@ static int _Transform(wc_Sha512* sha512)
|
||||
/* Copy digest to working vars */
|
||||
XMEMCPY(T, sha512->digest, sizeof(T));
|
||||
|
||||
#ifdef USE_SLOW_SHA2
|
||||
#ifdef USE_SLOW_SHA512
|
||||
/* over twice as small, but 50% slower */
|
||||
/* 80 operations, not unrolled */
|
||||
for (j = 0; j < 80; j += 16) {
|
||||
@@ -464,10 +472,9 @@ static int _Transform(wc_Sha512* sha512)
|
||||
R( 8); R( 9); R(10); R(11);
|
||||
R(12); R(13); R(14); R(15);
|
||||
}
|
||||
#endif /* USE_SLOW_SHA2 */
|
||||
#endif /* USE_SLOW_SHA512 */
|
||||
|
||||
/* Add the working vars back into digest */
|
||||
|
||||
sha512->digest[0] += a(0);
|
||||
sha512->digest[1] += b(0);
|
||||
sha512->digest[2] += c(0);
|
||||
|
||||
Reference in New Issue
Block a user