From 5a5fea7b46cf1a81dce89018a58ab3fc01b7b666 Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 9 Nov 2017 11:05:28 -0800 Subject: [PATCH] Add `USE_SLOW_SHA256` and `USE_SLOW_SHA512` options for reduced code size of SHA. Existing `USE_SLOW_SHA2` applies for SHA512 only. Cleanup formatting of the sha256.c and sha512.c code. Added new `./configure --lowresource` option, which defines the memory reduction defines. Fix for `make check` resume.test scipt with `NO_SESSION_CACHE` defined. --- IDE/GCC-ARM/README.md | 3 +- configure.ac | 96 ++++++++++++++++++++++++---------------- examples/client/client.c | 2 + scripts/resume.test | 15 ++++++- wolfcrypt/src/sha.c | 2 +- wolfcrypt/src/sha256.c | 46 ++++++++++++------- wolfcrypt/src/sha512.c | 55 +++++++++++++---------- 7 files changed, 139 insertions(+), 80 deletions(-) diff --git a/IDE/GCC-ARM/README.md b/IDE/GCC-ARM/README.md index f14c11fdc..f865bf99b 100644 --- a/IDE/GCC-ARM/README.md +++ b/IDE/GCC-ARM/README.md @@ -60,6 +60,7 @@ These settings are located in `Header/user_settings.h`. AES GCM: `GCM_SMALL`, `GCM_WORD32` or `GCM_TABLE`: Tunes performance and flash/memory usage. * `CURVED25519_SMALL`: Enables small versions of Ed/Curve (FE/GE math). * `USE_SLOW_SHA`: Enables smaller/slower version of SHA. -* `USE_SLOW_SHA2`: Over twice as small, but 50% slower +* `USE_SLOW_SHA256`: About 2k smaller and about 25% slower +* `USE_SLOW_SHA512`: Over twice as small, but 50% slower * `USE_CERT_BUFFERS_1024` or `USE_CERT_BUFFERS_2048`: Size of RSA certs / keys to test with. * `BENCH_EMBEDDED`: Define this if using the wolfCrypt test/benchmark and using a low memory target. diff --git a/configure.ac b/configure.ac index 0961c0743..5ba9dfe23 100644 --- a/configure.ac +++ b/configure.ac @@ -558,9 +558,10 @@ AC_ARG_ENABLE([leanpsk], if test "$ENABLED_LEANPSK" = "yes" then - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_LEANPSK -DWOLFSSL_STATIC_PSK -DHAVE_NULL_CIPHER -DSINGLE_THREADED -DNO_AES -DNO_FILESYSTEM -DNO_RABBIT -DNO_RSA -DNO_DSA -DNO_DH -DNO_CERTS -DNO_PWDBASED -DNO_MD4 -DNO_MD5 -DNO_ERROR_STRINGS -DNO_OLD_TLS -DNO_RC4 -DNO_WRITEV -DNO_SESSION_CACHE -DNO_DEV_RANDOM -DWOLFSSL_USER_IO -DNO_SHA -DUSE_SLOW_SHA" + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_LEANPSK -DWOLFSSL_STATIC_PSK -DHAVE_NULL_CIPHER -DSINGLE_THREADED -DNO_AES -DNO_FILESYSTEM -DNO_RABBIT -DNO_RSA -DNO_DSA -DNO_DH -DNO_CERTS -DNO_PWDBASED -DNO_MD4 -DNO_MD5 -DNO_ERROR_STRINGS -DNO_OLD_TLS -DNO_RC4 -DNO_WRITEV -DNO_DEV_RANDOM -DWOLFSSL_USER_IO -DNO_SHA" ENABLED_SLOWMATH="no" ENABLED_SINGLETHREADED="yes" + enable_lowresource=yes fi AM_CONDITIONAL([BUILD_LEANPSK], [test "x$ENABLED_LEANPSK" = "xyes"]) @@ -575,12 +576,32 @@ AC_ARG_ENABLE([leantls], if test "$ENABLED_LEANTLS" = "yes" then - AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_LEANTLS -DNO_WRITEV -DHAVE_ECC -DTFM_ECC256 -DECC_USER_CURVES -DNO_WOLFSSL_SERVER -DNO_RABBIT -DNO_RSA -DNO_DSA -DNO_DH -DNO_PWDBASED -DNO_MD5 -DNO_ERROR_STRINGS -DNO_OLD_TLS -DNO_RC4 -DNO_SESSION_CACHE -DNO_SHA -DUSE_SLOW_SHA -DUSE_SLOW_SHA2 -DNO_PSK -DNO_WOLFSSL_MEMORY" + AM_CFLAGS="$AM_CFLAGS -DWOLFSSL_LEANTLS -DNO_WRITEV -DHAVE_ECC -DTFM_ECC256 -DECC_USER_CURVES -DNO_WOLFSSL_SERVER -DNO_RABBIT -DNO_RSA -DNO_DSA -DNO_DH -DNO_PWDBASED -DNO_MD5 -DNO_ERROR_STRINGS -DNO_OLD_TLS -DNO_RC4 -DNO_SHA -DNO_PSK -DNO_WOLFSSL_MEMORY" + enable_lowresource=yes fi AM_CONDITIONAL([BUILD_LEANTLS], [test "x$ENABLED_LEANTLS" = "xyes"]) +# low resource options to reduce flash and memory use +AC_ARG_ENABLE([lowresource], + [AS_HELP_STRING([--enable-lowresource],[Enable low resource options for memory/flash (default: disabled)])], + [ ENABLED_LOWRESOURCE=$enableval ], + [ ENABLED_LOWRESOURCE=no ] + ) + +if test "$ENABLED_LOWRESOURCE" = "yes" +then + # low memory / flash flags + AM_CFLAGS="$AM_CFLAGS -DNO_SESSION_CACHE -DRSA_LOW_MEM -DGCM_SMALL -DCURVE25519_SMALL -DED25519_SMALL" + + # low flash flags + AM_CFLAGS="$AM_CFLAGS -DUSE_SLOW_SHA -DUSE_SLOW_SHA256 -DUSE_SLOW_SHA512" +fi + +AM_CONDITIONAL([BUILD_LOWMEM], [test "x$ENABLED_LOWRESOURCE" = "xyes"]) + + # big cache AC_ARG_ENABLE([bigcache], [AS_HELP_STRING([--enable-bigcache],[Enable big session cache (default: disabled)])], @@ -713,6 +734,7 @@ AS_IF([ test "x$ENABLED_SNIFFER" = "xyes" ], AM_CONDITIONAL([BUILD_SNIFFER], [ test "x$ENABLED_SNIFFER" = "xyes" ]) AM_CONDITIONAL([BUILD_SNIFFTEST], [ test "x$ENABLED_SNIFFTEST" = "xyes" ]) + # AES-GCM AC_ARG_ENABLE([aesgcm], [AS_HELP_STRING([--enable-aesgcm],[Enable wolfSSL AES-GCM support (default: enabled)])], @@ -727,26 +749,26 @@ then ENABLED_AESGCM=no fi -if test "$ENABLED_AESGCM" = "word32" +if test "$ENABLED_AESGCM" != "no" then - AM_CFLAGS="$AM_CFLAGS -DGCM_WORD32" - ENABLED_AESGCM=yes -fi + if test "$ENABLED_AESGCM" = "word32" + then + AM_CFLAGS="$AM_CFLAGS -DGCM_WORD32" + ENABLED_AESGCM=yes + fi -if test "$ENABLED_AESGCM" = "small" -then - AM_CFLAGS="$AM_CFLAGS -DGCM_SMALL" - ENABLED_AESGCM=yes -fi + if test "$ENABLED_AESGCM" = "small" || test "$ENABLED_LOWRESOURCE" = "yes" + then + AM_CFLAGS="$AM_CFLAGS -DGCM_SMALL" + ENABLED_AESGCM=yes + fi -if test "$ENABLED_AESGCM" = "table" -then - AM_CFLAGS="$AM_CFLAGS -DGCM_TABLE" - ENABLED_AESGCM=yes -fi + if test "$ENABLED_AESGCM" = "table" + then + AM_CFLAGS="$AM_CFLAGS -DGCM_TABLE" + ENABLED_AESGCM=yes + fi -if test "$ENABLED_AESGCM" = "yes" -then AM_CFLAGS="$AM_CFLAGS -DHAVE_AESGCM" fi @@ -1246,21 +1268,21 @@ then ENABLED_CURVE25519="yes" fi -if test "$ENABLED_CURVE25519" = "small" +if test "$ENABLED_CURVE25519" != "no" then - AM_CFLAGS="$AM_CFLAGS -DCURVE25519_SMALL" - ENABLED_CURVE25519_SMALL=yes - ENABLED_CURVE25519=yes -fi + if test "$ENABLED_CURVE25519" = "small" || test "$ENABLED_LOWRESOURCE" = "yes" + then + AM_CFLAGS="$AM_CFLAGS -DCURVE25519_SMALL" + ENABLED_CURVE25519_SMALL=yes + ENABLED_CURVE25519=yes + fi -if test "$ENABLED_CURVE25519" = "no128bit" -then - AM_CFLAGS="$AM_CFLAGS -DNO_CURVED25519_128BIT" - ENABLED_CURVE25519=yes -fi + if test "$ENABLED_CURVE25519" = "no128bit" + then + AM_CFLAGS="$AM_CFLAGS -DNO_CURVED25519_128BIT" + ENABLED_CURVE25519=yes + fi -if test "$ENABLED_CURVE25519" = "yes" -then AM_CFLAGS="$AM_CFLAGS -DHAVE_CURVE25519" ENABLED_FEMATH=yes fi @@ -1282,15 +1304,15 @@ then ENABLED_ED25519="yes" fi -if test "$ENABLED_ED25519" = "small" +if test "$ENABLED_ED25519" != "no" && test "$ENABLED_32BIT" = "no" then - AM_CFLAGS="$AM_CFLAGS -DED25519_SMALL" - ENABLED_ED25519_SMALL=yes - ENABLED_ED25519=yes -fi + if test "$ENABLED_ED25519" = "small" || test "$ENABLED_LOWRESOURCE" = "yes" + then + AM_CFLAGS="$AM_CFLAGS -DED25519_SMALL" + ENABLED_ED25519_SMALL=yes + ENABLED_ED25519=yes + fi -if test "$ENABLED_ED25519" = "yes" && test "$ENABLED_32BIT" = "no" -then if test "$ENABLED_SHA512" = "no" then AC_MSG_ERROR([cannot enable ed25519 without enabling sha512.]) diff --git a/examples/client/client.c b/examples/client/client.c index e6a32d45a..26004b5de 100644 --- a/examples/client/client.c +++ b/examples/client/client.c @@ -684,7 +684,9 @@ static void Usage(void) #endif printf("-m Match domain name in cert\n"); printf("-N Use Non-blocking sockets\n"); +#ifndef NO_SESSION_CACHE printf("-r Resume session\n"); +#endif printf("-w Wait for bidirectional shutdown\n"); printf("-M Use STARTTLS, using protocol (smtp)\n"); #ifdef HAVE_SECURE_RENEGOTIATION diff --git a/scripts/resume.test b/scripts/resume.test index 5badb4838..f948fb568 100755 --- a/scripts/resume.test +++ b/scripts/resume.test @@ -1,10 +1,11 @@ #!/bin/sh -#reusme.test +#resume.test # need a unique resume port since may run the same time as testsuite # use server port zero hack to get one resume_string="reused" +resume_sup_string="Resume session" ems_string="Extended\ Master\ Secret" resume_port=0 no_pid=-1 @@ -45,6 +46,18 @@ do_trap() { do_test() { echo -e "\nStarting example server for resume test...\n" + #make sure we support session resumption (!NO_SESSION_CACHE) + # Check the client for the extended master secret disable option. If + # present we need to run the test twice. + options_check=`./examples/client/client -?` + case "$options_check" in + *$resume_sup_string*) + echo -e "\nResume test supported";; + *) + echo -e "\nResume test not supported with build" + return;; + esac + remove_ready_file ./examples/server/server -r -R $ready_file -p $resume_port & server_pid=$! diff --git a/wolfcrypt/src/sha.c b/wolfcrypt/src/sha.c index 8f7255f26..16a38b02c 100644 --- a/wolfcrypt/src/sha.c +++ b/wolfcrypt/src/sha.c @@ -418,7 +418,7 @@ static INLINE void AddLength(wc_Sha* sha, word32 len) t = e; e = d; d = c; c = b; b = a; a = t; } #else - /* nearly 1 K bigger in code size but 25% faster */ + /* nearly 1 K bigger in code size but 25% faster */ /* 4 rounds of 20 operations each. Loop unrolled. */ R0(a,b,c,d,e, 0); R0(e,a,b,c,d, 1); R0(d,e,a,b,c, 2); R0(c,d,e,a,b, 3); R0(b,c,d,e,a, 4); R0(a,b,c,d,e, 5); R0(e,a,b,c,d, 6); R0(d,e,a,b,c, 7); diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index 470914cf1..e8fac1a13 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -538,16 +538,25 @@ static int InitSha256(wc_Sha256* sha256) #define R(x, n) (((x) & 0xFFFFFFFFU) >> (n)) #define S(x, n) rotrFixed(x, n) - #define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22)) - #define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25)) - #define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3)) + #define Sigma0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22)) + #define Sigma1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25)) + #define Gamma0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3)) #define Gamma1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10)) - #define RND(a,b,c,d,e,f,g,h,i) \ - t0 = (h) + Sigma1((e)) + Ch((e), (f), (g)) + K[(i)] + W[(i)]; \ - t1 = Sigma0((a)) + Maj((a), (b), (c)); \ - (d) += t0; \ - (h) = t0 + t1; + #define a(i) S[(0-i) & 7] + #define b(i) S[(1-i) & 7] + #define c(i) S[(2-i) & 7] + #define d(i) S[(3-i) & 7] + #define e(i) S[(4-i) & 7] + #define f(i) S[(5-i) & 7] + #define g(i) S[(6-i) & 7] + #define h(i) S[(7-i) & 7] + + #define RND(j) \ + t0 = h(j) + Sigma1(e(j)) + Ch(e(j), f(j), g(j)) + K[i+j] + W[i+j]; \ + t1 = Sigma0(a(j)) + Maj(a(j), b(j), c(j)); \ + d(j) += t0; \ + h(j) = t0 + t1 #ifndef XTRANSFORM #define XTRANSFORM(S, B) Transform((S)) @@ -579,16 +588,21 @@ static int InitSha256(wc_Sha256* sha256) for (i = 16; i < WC_SHA256_BLOCK_SIZE; i++) W[i] = Gamma1(W[i-2]) + W[i-7] + Gamma0(W[i-15]) + W[i-16]; + #ifdef USE_SLOW_SHA256 + /* not unrolled - ~2k smaller and ~25% slower */ for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) { - RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],i+0); - RND(S[7],S[0],S[1],S[2],S[3],S[4],S[5],S[6],i+1); - RND(S[6],S[7],S[0],S[1],S[2],S[3],S[4],S[5],i+2); - RND(S[5],S[6],S[7],S[0],S[1],S[2],S[3],S[4],i+3); - RND(S[4],S[5],S[6],S[7],S[0],S[1],S[2],S[3],i+4); - RND(S[3],S[4],S[5],S[6],S[7],S[0],S[1],S[2],i+5); - RND(S[2],S[3],S[4],S[5],S[6],S[7],S[0],S[1],i+6); - RND(S[1],S[2],S[3],S[4],S[5],S[6],S[7],S[0],i+7); + int j; + for (j = 0; j < 8; j++) { /* braces needed here for macros {} */ + RND(j); + } } + #else + /* partially loop unrolled */ + for (i = 0; i < WC_SHA256_BLOCK_SIZE; i += 8) { + RND(0); RND(1); RND(2); RND(3); + RND(4); RND(5); RND(6); RND(7); + } + #endif /* USE_SLOW_SHA256 */ /* Add the working vars back into digest state[] */ for (i = 0; i < 8; i++) { diff --git a/wolfcrypt/src/sha512.c b/wolfcrypt/src/sha512.c index 7bb1bf33f..bada40af7 100644 --- a/wolfcrypt/src/sha512.c +++ b/wolfcrypt/src/sha512.c @@ -31,6 +31,11 @@ #include #include +/* deprecated USE_SLOW_SHA2 (replaced with USE_SLOW_SHA512) */ +#if defined(USE_SLOW_SHA2) && !defined(USE_SLOW_SHA512) + #define USE_SLOW_SHA512 +#endif + /* fips wrapper calls, user can call direct */ #ifdef HAVE_FIPS int wc_InitSha512(wc_Sha512* sha) @@ -401,40 +406,43 @@ static const word64 K512[80] = { W64LIT(0x5fcb6fab3ad6faec), W64LIT(0x6c44198c4a475817) }; - - #define blk0(i) (W[i] = sha512->buffer[i]) -#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15])) +#define blk2(i) (\ + W[ i & 15] += \ + s1(W[(i-2) & 15])+ \ + W[(i-7) & 15] + \ + s0(W[(i-15) & 15]) \ + ) -#define Ch(x,y,z) (z^(x&(y^z))) -#define Maj(x,y,z) ((x&y)|(z&(x|y))) +#define Ch(x,y,z) (z ^ (x & (y ^ z))) +#define Maj(x,y,z) ((x & y) | (z & (x | y))) -#define a(i) T[(0-i)&7] -#define b(i) T[(1-i)&7] -#define c(i) T[(2-i)&7] -#define d(i) T[(3-i)&7] -#define e(i) T[(4-i)&7] -#define f(i) T[(5-i)&7] -#define g(i) T[(6-i)&7] -#define h(i) T[(7-i)&7] +#define a(i) T[(0-i) & 7] +#define b(i) T[(1-i) & 7] +#define c(i) T[(2-i) & 7] +#define d(i) T[(3-i) & 7] +#define e(i) T[(4-i) & 7] +#define f(i) T[(5-i) & 7] +#define g(i) T[(6-i) & 7] +#define h(i) T[(7-i) & 7] -#define S0(x) (rotrFixed64(x,28)^rotrFixed64(x,34)^rotrFixed64(x,39)) -#define S1(x) (rotrFixed64(x,14)^rotrFixed64(x,18)^rotrFixed64(x,41)) -#define s0(x) (rotrFixed64(x,1)^rotrFixed64(x,8)^(x>>7)) -#define s1(x) (rotrFixed64(x,19)^rotrFixed64(x,61)^(x>>6)) +#define S0(x) (rotrFixed64(x,28) ^ rotrFixed64(x,34) ^ rotrFixed64(x,39)) +#define S1(x) (rotrFixed64(x,14) ^ rotrFixed64(x,18) ^ rotrFixed64(x,41)) +#define s0(x) (rotrFixed64(x,1) ^ rotrFixed64(x,8) ^ (x>>7)) +#define s1(x) (rotrFixed64(x,19) ^ rotrFixed64(x,61) ^ (x>>6)) -#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+K[i+j]+(j?blk2(i):blk0(i));\ - d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)) +#define R(i) \ + h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[i+j] + (j ? blk2(i) : blk0(i)); \ + d(i) += h(i); \ + h(i) += S0(a(i)) + Maj(a(i),b(i),c(i)) static int _Transform(wc_Sha512* sha512) { const word64* K = K512; - word32 j; word64 T[8]; - #ifdef WOLFSSL_SMALL_STACK word64* W; W = (word64*) XMALLOC(sizeof(word64) * 16, NULL, DYNAMIC_TYPE_TMP_BUFFER); @@ -447,7 +455,7 @@ static int _Transform(wc_Sha512* sha512) /* Copy digest to working vars */ XMEMCPY(T, sha512->digest, sizeof(T)); -#ifdef USE_SLOW_SHA2 +#ifdef USE_SLOW_SHA512 /* over twice as small, but 50% slower */ /* 80 operations, not unrolled */ for (j = 0; j < 80; j += 16) { @@ -464,10 +472,9 @@ static int _Transform(wc_Sha512* sha512) R( 8); R( 9); R(10); R(11); R(12); R(13); R(14); R(15); } -#endif /* USE_SLOW_SHA2 */ +#endif /* USE_SLOW_SHA512 */ /* Add the working vars back into digest */ - sha512->digest[0] += a(0); sha512->digest[1] += b(0); sha512->digest[2] += c(0);