Merge pull request #1217 from dgarske/sha_slow

New `--enable-lowresource` option and SHA256 not unrolled support
This commit is contained in:
toddouska
2017-11-13 16:02:01 -08:00
committed by GitHub
7 changed files with 139 additions and 80 deletions

View File

@@ -418,7 +418,7 @@ static INLINE void AddLength(wc_Sha* sha, word32 len)
t = e; e = d; d = c; c = b; b = a; a = t;
}
#else
/* nearly 1 K bigger in code size but 25% faster */
/* nearly 1 K bigger in code size but 25% faster */
/* 4 rounds of 20 operations each. Loop unrolled. */
R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3);
R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7);

View File

@@ -538,16 +538,25 @@ static int InitSha256(wc_Sha256* sha256)
#define R(x, n) (((x) & 0xFFFFFFFFU) >> (n))
#define S(x, n) rotrFixed(x, n)
#define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
#define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
#define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
#define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22))
#define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25))
#define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3))
#define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10))
#define RND(a,b,c,d,e,f,g,h,i) \
t0 = (h) + Sigma1((e)) + Ch((e), (f), (g)) + K[(i)] + W[(i)]; \
t1 = Sigma0((a)) + Maj((a), (b), (c)); \
(d) += t0; \
(h) = t0 + t1;
#define a(i) S[(0-i) & 7]
#define b(i) S[(1-i) & 7]
#define c(i) S[(2-i) & 7]
#define d(i) S[(3-i) & 7]
#define e(i) S[(4-i) & 7]
#define f(i) S[(5-i) & 7]
#define g(i) S[(6-i) & 7]
#define h(i) S[(7-i) & 7]
#define RND(j) \
t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + W[i+j]; \
t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \
d(j) += t0; \
h(j) = t0 + t1
#ifndef XTRANSFORM
#define XTRANSFORM(S, B) Transform((S))
@@ -579,16 +588,21 @@ static int InitSha256(wc_Sha256* sha256)
for (i = 16; i < WC_SHA256_BLOCK_SIZE; i++)
W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16];
#ifdef USE_SLOW_SHA256
/* not unrolled - ~2k smaller and ~25% slower */
for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) {
RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0);
RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1);
RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2);
RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3);
RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4);
RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5);
RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6);
RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7);
int j;
for (j = 0; j < 8; j++) { /* braces needed here for macros {} */
RND(j);
}
}
#else
/* partially loop unrolled */
for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) {
RND(0); RND(1); RND(2); RND(3);
RND(4); RND(5); RND(6); RND(7);
}
#endif /* USE_SLOW_SHA256 */
/* Add the working vars back into digest state[] */
for (i = 0; i < 8; i++) {

View File

@@ -31,6 +31,11 @@
#include <wolfssl/wolfcrypt/error-crypt.h>
#include <wolfssl/wolfcrypt/cpuid.h>
/* deprecated USE_SLOW_SHA2 (replaced with USE_SLOW_SHA512) */
#if defined(USE_SLOW_SHA2) && !defined(USE_SLOW_SHA512)
#define USE_SLOW_SHA512
#endif
/* fips wrapper calls, user can call direct */
#ifdef HAVE_FIPS
int wc_InitSha512(wc_Sha512* sha)
@@ -401,40 +406,43 @@ static const word64 K512[80] = {
W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817)
};
#define blk0(i) (W[i] = sha512->buffer[i])
#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15]))
#define blk2(i) (\
W[ i & 15] += \
s1(W[(i-2) & 15])+ \
W[(i-7) & 15] + \
s0(W[(i-15) & 15]) \
)
#define Ch(x,y,z) (z^(x&(y^z)))
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
#define Maj(x,y,z) ((x & y) | (z & (x | y)))
#define a(i) T[(0-i)&7]
#define b(i) T[(1-i)&7]
#define c(i) T[(2-i)&7]
#define d(i) T[(3-i)&7]
#define e(i) T[(4-i)&7]
#define f(i) T[(5-i)&7]
#define g(i) T[(6-i)&7]
#define h(i) T[(7-i)&7]
#define a(i) T[(0-i) & 7]
#define b(i) T[(1-i) & 7]
#define c(i) T[(2-i) & 7]
#define d(i) T[(3-i) & 7]
#define e(i) T[(4-i) & 7]
#define f(i) T[(5-i) & 7]
#define g(i) T[(6-i) & 7]
#define h(i) T[(7-i) & 7]
#define S0(x) (rotrFixed64(x,28)^rotrFixed64(x,34)^rotrFixed64(x,39))
#define S1(x) (rotrFixed64(x,14)^rotrFixed64(x,18)^rotrFixed64(x,41))
#define s0(x) (rotrFixed64(x,1)^rotrFixed64(x,8)^(x>>7))
#define s1(x) (rotrFixed64(x,19)^rotrFixed64(x,61)^(x>>6))
#define S0(x) (rotrFixed64(x,28) ^ rotrFixed64(x,34) ^ rotrFixed64(x,39))
#define S1(x) (rotrFixed64(x,14) ^ rotrFixed64(x,18) ^ rotrFixed64(x,41))
#define s0(x) (rotrFixed64(x,1) ^ rotrFixed64(x,8) ^ (x>>7))
#define s1(x) (rotrFixed64(x,19) ^ rotrFixed64(x,61) ^ (x>>6))
#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j]+(j?blk2(i):blk0(i));\
d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i))
#define R(i) \
h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[i+j] + (j ? blk2(i) : blk0(i)); \
d(i) += h(i); \
h(i) += S0(a(i)) + Maj(a(i),b(i),c(i))
static int _Transform(wc_Sha512* sha512)
{
const word64* K = K512;
word32 j;
word64 T[8];
#ifdef WOLFSSL_SMALL_STACK
word64* W;
W = (word64*) XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER);
@@ -447,7 +455,7 @@ static int _Transform(wc_Sha512* sha512)
/* Copy digest to working vars */
XMEMCPY(T, sha512->digest, sizeof(T));
#ifdef USE_SLOW_SHA2
#ifdef USE_SLOW_SHA512
/* over twice as small, but 50% slower */
/* 80 operations, not unrolled */
for (j = 0; j < 80; j += 16) {
@@ -464,10 +472,9 @@ static int _Transform(wc_Sha512* sha512)
R( 8); R( 9); R(10); R(11);
R(12); R(13); R(14); R(15);
}
#endif /* USE_SLOW_SHA2 */
#endif /* USE_SLOW_SHA512 */
/* Add the working vars back into digest */
sha512->digest[0] += a(0);
sha512->digest[1] += b(0);
sha512->digest[2] += c(0);